// reimburseSweepCredit records that unusableBytes bytes of a // just-allocated span are not available for object allocation. This // offsets the worst-case charge performed by deductSweepCredit. func reimburseSweepCredit(unusableBytes uintptr) { if mheap_.sweepPagesPerByte == 0 { // Nobody cares about the credit. Avoid the atomic. return } atomic.Xadd64(&mheap_.spanBytesAlloc, -int64(unusableBytes)) }
// If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black. //go:nowritebarrier func gcmarknewobject_m(obj, size uintptr) { if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. throw("gcmarknewobject called while doing checkmark") } heapBitsForAddr(obj).setMarked() atomic.Xadd64(&work.bytesMarked, int64(size)) }
// dispose returns any cached pointers to the global queue. // The buffers are being put on the full queue so that the // write barriers will not simply reacquire them before the // GC can inspect them. This helps reduce the mutator's // ability to hide pointers during the concurrent mark phase. // //go:nowritebarrier func (w *gcWork) dispose() { if wbuf := w.wbuf1.ptr(); wbuf != nil { if wbuf.nobj == 0 { putempty(wbuf, 212) } else { putfull(wbuf, 214) } w.wbuf1 = 0 wbuf = w.wbuf2.ptr() if wbuf.nobj == 0 { putempty(wbuf, 218) } else { putfull(wbuf, 220) } w.wbuf2 = 0 } if w.bytesMarked != 0 { // dispose happens relatively infrequently. If this // atomic becomes a problem, we should first try to // dispose less and if necessary aggregate in a per-P // counter. atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) w.bytesMarked = 0 } if w.scanWork != 0 { atomic.Xaddint64(&gcController.scanWork, w.scanWork) w.scanWork = 0 } }
// Variant of sync/atomic's TestUnaligned64: func TestUnaligned64(t *testing.T) { // Unaligned 64-bit atomics on 32-bit systems are // a continual source of pain. Test that on 32-bit systems they crash // instead of failing silently. switch runtime.GOARCH { default: if unsafe.Sizeof(int(0)) != 4 { t.Skip("test only runs on 32-bit systems") } case "amd64p32": // amd64p32 can handle unaligned atomics. t.Skipf("test not needed on %v", runtime.GOARCH) } x := make([]uint32, 4) up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned p64 := (*int64)(unsafe.Pointer(&x[1])) // misaligned shouldPanic(t, "Load64", func() { atomic.Load64(up64) }) shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) }) shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) }) shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) }) shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) }) shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) }) }
// Atomically decreases a given *system* memory stat. Same comments as // mSysStatInc apply. //go:nosplit func mSysStatDec(sysStat *uint64, n uintptr) { if sys.BigEndian != 0 { atomic.Xadd64(sysStat, -int64(n)) return } if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n { print("runtime: stat underflow: val ", val, ", n ", n, "\n") exit(2) } }
// Atomically increases a given *system* memory stat. We are counting on this // stat never overflowing a uintptr, so this function must only be used for // system memory stats. // // The current implementation for little endian architectures is based on // xadduintptr(), which is less than ideal: xadd64() should really be used. // Using xadduintptr() is a stop-gap solution until arm supports xadd64() that // doesn't use locks. (Locks are a problem as they require a valid G, which // restricts their useability.) // // A side-effect of using xadduintptr() is that we need to check for // overflow errors. //go:nosplit func mSysStatInc(sysStat *uint64, n uintptr) { if _BigEndian != 0 { atomic.Xadd64(sysStat, int64(n)) return } if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n { print("runtime: stat overflow: val ", val, ", n ", n, "\n") exit(2) } }
// deductSweepCredit deducts sweep credit for allocating a span of // size spanBytes. This must be performed *before* the span is // allocated to ensure the system has enough credit. If necessary, it // performs sweeping to prevent going in to debt. If the caller will // also sweep pages (e.g., for a large allocation), it can pass a // non-zero callerSweepPages to leave that many pages unswept. // // deductSweepCredit makes a worst-case assumption that all spanBytes // bytes of the ultimately allocated span will be available for object // allocation. The caller should call reimburseSweepCredit if that // turns out not to be the case once the span is allocated. // // deductSweepCredit is the core of the "proportional sweep" system. // It uses statistics gathered by the garbage collector to perform // enough sweeping so that all pages are swept during the concurrent // sweep phase between GC cycles. // // mheap_ must NOT be locked. func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { if mheap_.sweepPagesPerByte == 0 { // Proportional sweep is done or disabled. return } // Account for this span allocation. spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes)) // Fix debt if necessary. pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc)) for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) { if gosweepone() == ^uintptr(0) { mheap_.sweepPagesPerByte = 0 break } } }
// Return span from an MCache. func (c *mcentral) uncacheSpan(s *mspan) { lock(&c.lock) s.incache = false if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } cap := int32((s.npages << _PageShift) / s.elemsize) n := cap - int32(s.allocCount) if n > 0 { c.empty.remove(s) c.nonempty.insert(s) // mCentral_CacheSpan conservatively counted // unallocated slots in heap_live. Undo this. atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) } unlock(&c.lock) }
func testAtomic64() { test_z64 = 42 test_x64 = 0 prefetcht0(uintptr(unsafe.Pointer(&test_z64))) prefetcht1(uintptr(unsafe.Pointer(&test_z64))) prefetcht2(uintptr(unsafe.Pointer(&test_z64))) prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 0 { throw("cas64 failed") } test_x64 = 42 if !atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 42 || test_z64 != 1 { throw("cas64 failed") } if atomic.Load64(&test_z64) != 1 { throw("load64 failed") } atomic.Store64(&test_z64, (1<<40)+1) if atomic.Load64(&test_z64) != (1<<40)+1 { throw("store64 failed") } if atomic.Xadd64(&test_z64, (1<<40)+1) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Load64(&test_z64) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Xchg64(&test_z64, (3<<40)+3) != (2<<40)+2 { throw("xchg64 failed") } if atomic.Load64(&test_z64) != (3<<40)+3 { throw("xchg64 failed") } }
// oneNewExtraM allocates an m and puts it on the extra list. func oneNewExtraM() { // Create extra goroutine locked to extra m. // The goroutine is the context in which the cgo callback will run. // The sched.pc will never be returned to, but setting it to // goexit makes clear to the traceback routines where // the goroutine stack ends. var g0SP unsafe.Pointer var g0SPSize uintptr mp := allocm(nil, true, &g0SP, &g0SPSize) gp := malg(true, false, nil, nil) gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary gp.gcRescan = -1 // malg returns status as Gidle, change to Gdead before adding to allg // where GC will see it. // gccgo uses Gdead here, not Gsyscall, because the split // stack context is not initialized. casgstatus(gp, _Gidle, _Gdead) gp.m = mp mp.curg = gp mp.locked = _LockInternal mp.lockedg = gp gp.lockedm = mp gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1)) if raceenabled { gp.racectx = racegostart(funcPC(newextram)) } // put on allg for garbage collector allgadd(gp) // The context for gp will be set up in needm. // Here we need to set the context for g0. makeGContext(mp.g0, g0SP, g0SPSize) // Add m to the extra list. mnext := lockextra(true) mp.schedlink.set(mnext) unlockextra(mp) }
func parfordo(desc *parfor) { // Obtain 0-based thread index. tid := atomic.Xadd(&desc.thrseq, 1) - 1 if tid >= desc.nthr { print("tid=", tid, " nthr=", desc.nthr, "\n") throw("parfor: invalid tid") } // If single-threaded, just execute the for serially. body := desc.body if desc.nthr == 1 { for i := uint32(0); i < desc.cnt; i++ { body(desc, i) } return } me := &desc.thr[tid] mypos := &me.pos for { for { // While there is local work, // bump low index and execute the iteration. pos := atomic.Xadd64(mypos, 1) begin := uint32(pos) - 1 end := uint32(pos >> 32) if begin < end { body(desc, begin) continue } break } // Out of work, need to steal something. idle := false for try := uint32(0); ; try++ { // If we don't see any work for long enough, // increment the done counter... if try > desc.nthr*4 && !idle { idle = true atomic.Xadd(&desc.done, 1) } // ...if all threads have incremented the counter, // we are done. extra := uint32(0) if !idle { extra = 1 } if desc.done+extra == desc.nthr { if !idle { atomic.Xadd(&desc.done, 1) } goto exit } // Choose a random victim for stealing. var begin, end uint32 victim := fastrand1() % (desc.nthr - 1) if victim >= tid { victim++ } victimpos := &desc.thr[victim].pos for { // See if it has any work. pos := atomic.Load64(victimpos) begin = uint32(pos) end = uint32(pos >> 32) if begin+1 >= end { end = 0 begin = end break } if idle { atomic.Xadd(&desc.done, -1) idle = false } begin2 := begin + (end-begin)/2 newpos := uint64(begin) | uint64(begin2)<<32 if atomic.Cas64(victimpos, pos, newpos) { begin = begin2 break } } if begin < end { // Has successfully stolen some work. if idle { throw("parfor: should not be idle") } atomic.Store64(mypos, uint64(begin)|uint64(end)<<32) me.nsteal++ me.nstealcnt += uint64(end) - uint64(begin) break } // Backoff. if try < desc.nthr { // nothing } else if try < 4*desc.nthr { me.nprocyield++ procyield(20) } else if !desc.wait { // If a caller asked not to wait for the others, exit now // (assume that most work is already done at this point). if !idle { atomic.Xadd(&desc.done, 1) } goto exit } else if try < 6*desc.nthr { me.nosyield++ osyield() } else { me.nsleep++ usleep(1) } } } exit: atomic.Xadd64(&desc.nsteal, int64(me.nsteal)) atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt)) atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield)) atomic.Xadd64(&desc.nosyield, int64(me.nosyield)) atomic.Xadd64(&desc.nsleep, int64(me.nsleep)) me.nsteal = 0 me.nstealcnt = 0 me.nprocyield = 0 me.nosyield = 0 me.nsleep = 0 }
// Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. // Returns true if the span was returned to heap. // If preserve=true, don't return it to heap nor relink in MCentral lists; // caller takes care of it. //TODO go:nowritebarrier func (s *mspan) sweep(preserve bool) bool { // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { throw("MSpan_Sweep: m is not locked") } sweepgen := mheap_.sweepgen if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state") } if trace.enabled { traceGCSweepStart() } atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) cl := s.sizeclass size := s.elemsize res := false nfree := 0 c := _g_.m.mcache freeToHeap := false // The allocBits indicate which unmarked objects don't need to be // processed since they were free at the end of the last GC cycle // and were not allocated since then. // If the allocBits index is >= s.freeindex and the bit // is not marked then the object remains unallocated // since the last GC. // This situation is analogous to being on a freelist. // Unlink & free special records for any objects we're about to free. // Two complications here: // 1. An object can have both finalizer and profile special records. // In such case we need to queue finalizer for execution, // mark the object as live and preserve the profile special. // 2. A tiny object can have several finalizers setup for different offsets. // If such object is not marked, we need to queue all finalizers at once. // Both 1 and 2 are possible at the same time. specialp := &s.specials special := *specialp for special != nil { // A finalizer can be set for an inner byte of an object, find object beginning. objIndex := uintptr(special.offset) / size p := s.base() + objIndex*size mbits := s.markBitsForIndex(objIndex) if !mbits.isMarked() { // This object is not marked and has at least one special record. // Pass 1: see if it has at least one finalizer. hasFin := false endOffset := p - s.base() + size for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { if tmp.kind == _KindSpecialFinalizer { // Stop freeing of object if it has a finalizer. mbits.setMarkedNonAtomic() hasFin = true break } } // Pass 2: queue all finalizers _or_ handle profile record. for special != nil && uintptr(special.offset) < endOffset { // Find the exact byte for which the special was setup // (as opposed to object beginning). p := s.base() + uintptr(special.offset) if special.kind == _KindSpecialFinalizer || !hasFin { // Splice out special record. y := special special = special.next *specialp = special freespecial(y, unsafe.Pointer(p), size) } else { // This is profile record, but the object has finalizers (so kept alive). // Keep special record. specialp = &special.next special = *specialp } } } else { // object is still live: keep special record specialp = &special.next special = *specialp } } if debug.allocfreetrace != 0 || raceenabled || msanenabled { // Find all newly freed objects. This doesn't have to // efficient; allocfreetrace has massive overhead. mbits := s.markBitsForBase() abits := s.allocBitsForIndex(0) for i := uintptr(0); i < s.nelems; i++ { if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { x := s.base() + i*s.elemsize if debug.allocfreetrace != 0 { tracefree(unsafe.Pointer(x), size) } if raceenabled { racefree(unsafe.Pointer(x), size) } if msanenabled { msanfree(unsafe.Pointer(x), size) } } mbits.advance() abits.advance() } } // Count the number of free objects in this span. nfree = s.countFree() if cl == 0 && nfree != 0 { s.needzero = 1 freeToHeap = true } nalloc := uint16(s.nelems) - uint16(nfree) nfreed := s.allocCount - nalloc if nalloc > s.allocCount { print("runtime: nelems=", s.nelems, " nfree=", nfree, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") throw("sweep increased allocation count") } s.allocCount = nalloc wasempty := s.nextFreeIndex() == s.nelems s.freeindex = 0 // reset allocation index to start of span. // gcmarkBits becomes the allocBits. // get a fresh cleared gcmarkBits in preparation for next GC s.allocBits = s.gcmarkBits s.gcmarkBits = newMarkBits(s.nelems) // Initialize alloc bits cache. s.refillAllocCache(0) // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, // because of the potential for a concurrent free/SetFinalizer. // But we need to set it before we make the span available for allocation // (return it to heap or mcentral), because allocation code assumes that a // span is already swept if available for allocation. if freeToHeap || nfreed == 0 { // The span must be in our exclusive ownership until we update sweepgen, // check for potential races. if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state after sweep") } // Serialization point. // At this point the mark bits are cleared and allocation ready // to go so release the span. atomic.Store(&s.sweepgen, sweepgen) } if nfreed > 0 && cl != 0 { c.local_nsmallfree[cl] += uintptr(nfreed) res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty) // MCentral_FreeSpan updates sweepgen } else if freeToHeap { // Free large span to heap // NOTE(rsc,dvyukov): The original implementation of efence // in CL 22060046 used SysFree instead of SysFault, so that // the operating system would eventually give the memory // back to us again, so that an efence program could run // longer without running out of memory. Unfortunately, // calling SysFree here without any kind of adjustment of the // heap data structures means that when the memory does // come back to us, we have the wrong metadata for it, either in // the MSpan structures or in the garbage collection bitmap. // Using SysFault here means that the program will run out of // memory fairly quickly in efence mode, but at least it won't // have mysterious crashes due to confused memory reuse. // It should be possible to switch back to SysFree if we also // implement and then call some kind of MHeap_DeleteSpan. if debug.efence > 0 { s.limit = 0 // prevent mlookup from finding this span sysFault(unsafe.Pointer(s.base()), size) } else { mheap_.freeSpan(s, 1) } c.local_nlargefree++ c.local_largefree += size res = true } if !res { // The span has been swept and is still in-use, so put // it on the swept in-use list. mheap_.sweepSpans[sweepgen/2%2].push(s) } if trace.enabled { traceGCSweepDone() } return res }
// Allocate a new span of npage pages from the heap for GC'd memory // and record its size class in the HeapMap and HeapMapCache. func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { _g_ := getg() if _g_ != _g_.m.g0 { throw("_mheap_alloc not on g0 stack") } lock(&h.lock) // To prevent excessive heap growth, before allocating n pages // we need to sweep and reclaim at least n pages. if h.sweepdone == 0 { // TODO(austin): This tends to sweep a large number of // spans in order to find a few completely free spans // (for example, in the garbage benchmark, this sweeps // ~30x the number of pages its trying to allocate). // If GC kept a bit for whether there were any marks // in a span, we could release these free spans // at the end of GC and eliminate this entirely. h.reclaim(npage) } // transfer stats from cache to global memstats.heap_scan += uint64(_g_.m.mcache.local_scan) _g_.m.mcache.local_scan = 0 memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs) _g_.m.mcache.local_tinyallocs = 0 s := h.allocSpanLocked(npage) if s != nil { // Record span info, because gc needs to be // able to map interior pointer to containing span. atomic.Store(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse s.allocCount = 0 s.sizeclass = uint8(sizeclass) if sizeclass == 0 { s.elemsize = s.npages << _PageShift s.divShift = 0 s.divMul = 0 s.divShift2 = 0 s.baseMask = 0 } else { s.elemsize = uintptr(class_to_size[sizeclass]) m := &class_to_divmagic[sizeclass] s.divShift = m.shift s.divMul = m.mul s.divShift2 = m.shift2 s.baseMask = m.baseMask } // update stats, sweep lists h.pagesInUse += uint64(npage) if large { memstats.heap_objects++ atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift)) // Swept spans are at the end of lists. if s.npages < uintptr(len(h.free)) { h.busy[s.npages].insertBack(s) } else { h.busylarge.insertBack(s) } } } // heap_scan and heap_live were updated. if gcBlackenEnabled != 0 { gcController.revise() } if trace.enabled { traceHeapAlloc() } // h_spans is accessed concurrently without synchronization // from other threads. Hence, there must be a store/store // barrier here to ensure the writes to h_spans above happen // before the caller can publish a pointer p to an object // allocated from s. As soon as this happens, the garbage // collector running on another processor could read p and // look up s in h_spans. The unlock acts as the barrier to // order these writes. On the read side, the data dependency // between p and the index in h_spans orders the reads. unlock(&h.lock) return s }
// Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. // Returns true if the span was returned to heap. // If preserve=true, don't return it to heap nor relink in MCentral lists; // caller takes care of it. //TODO go:nowritebarrier func (s *mspan) sweep(preserve bool) bool { // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { throw("MSpan_Sweep: m is not locked") } sweepgen := mheap_.sweepgen if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state") } if trace.enabled { traceGCSweepStart() } atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) cl := s.sizeclass size := s.elemsize res := false nfree := 0 var head, end gclinkptr c := _g_.m.mcache freeToHeap := false // Mark any free objects in this span so we don't collect them. sstart := uintptr(s.start << _PageShift) for link := s.freelist; link.ptr() != nil; link = link.ptr().next { if uintptr(link) < sstart || s.limit <= uintptr(link) { // Free list is corrupted. dumpFreeList(s) throw("free list corrupted") } heapBitsForAddr(uintptr(link)).setMarkedNonAtomic() } // Unlink & free special records for any objects we're about to free. // Two complications here: // 1. An object can have both finalizer and profile special records. // In such case we need to queue finalizer for execution, // mark the object as live and preserve the profile special. // 2. A tiny object can have several finalizers setup for different offsets. // If such object is not marked, we need to queue all finalizers at once. // Both 1 and 2 are possible at the same time. specialp := &s.specials special := *specialp for special != nil { // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size hbits := heapBitsForAddr(p) if !hbits.isMarked() { // This object is not marked and has at least one special record. // Pass 1: see if it has at least one finalizer. hasFin := false endOffset := p - uintptr(s.start<<_PageShift) + size for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { if tmp.kind == _KindSpecialFinalizer { // Stop freeing of object if it has a finalizer. hbits.setMarkedNonAtomic() hasFin = true break } } // Pass 2: queue all finalizers _or_ handle profile record. for special != nil && uintptr(special.offset) < endOffset { // Find the exact byte for which the special was setup // (as opposed to object beginning). p := uintptr(s.start<<_PageShift) + uintptr(special.offset) if special.kind == _KindSpecialFinalizer || !hasFin { // Splice out special record. y := special special = special.next *specialp = special freespecial(y, unsafe.Pointer(p), size) } else { // This is profile record, but the object has finalizers (so kept alive). // Keep special record. specialp = &special.next special = *specialp } } } else { // object is still live: keep special record specialp = &special.next special = *specialp } } // Sweep through n objects of given size starting at p. // This thread owns the span now, so it can manipulate // the block bitmap without atomic operations. size, n, _ := s.layout() heapBitsSweepSpan(s.base(), size, n, func(p uintptr) { // At this point we know that we are looking at garbage object // that needs to be collected. if debug.allocfreetrace != 0 { tracefree(unsafe.Pointer(p), size) } if msanenabled { msanfree(unsafe.Pointer(p), size) } // Reset to allocated+noscan. if cl == 0 { // Free large span. if preserve { throw("can't preserve large span") } heapBitsForSpan(p).initSpan(s.layout()) s.needzero = 1 // Free the span after heapBitsSweepSpan // returns, since it's not done with the span. freeToHeap = true } else { // Free small object. if size > 2*ptrSize { *(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed" } else if size > ptrSize { *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0 } if head.ptr() == nil { head = gclinkptr(p) } else { end.ptr().next = gclinkptr(p) } end = gclinkptr(p) end.ptr().next = gclinkptr(0x0bade5) nfree++ } }) // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, // because of the potential for a concurrent free/SetFinalizer. // But we need to set it before we make the span available for allocation // (return it to heap or mcentral), because allocation code assumes that a // span is already swept if available for allocation. if freeToHeap || nfree == 0 { // The span must be in our exclusive ownership until we update sweepgen, // check for potential races. if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") throw("MSpan_Sweep: bad span state after sweep") } atomic.Store(&s.sweepgen, sweepgen) } if nfree > 0 { c.local_nsmallfree[cl] += uintptr(nfree) res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve) // MCentral_FreeSpan updates sweepgen } else if freeToHeap { // Free large span to heap // NOTE(rsc,dvyukov): The original implementation of efence // in CL 22060046 used SysFree instead of SysFault, so that // the operating system would eventually give the memory // back to us again, so that an efence program could run // longer without running out of memory. Unfortunately, // calling SysFree here without any kind of adjustment of the // heap data structures means that when the memory does // come back to us, we have the wrong metadata for it, either in // the MSpan structures or in the garbage collection bitmap. // Using SysFault here means that the program will run out of // memory fairly quickly in efence mode, but at least it won't // have mysterious crashes due to confused memory reuse. // It should be possible to switch back to SysFree if we also // implement and then call some kind of MHeap_DeleteSpan. if debug.efence > 0 { s.limit = 0 // prevent mlookup from finding this span sysFault(unsafe.Pointer(uintptr(s.start<<_PageShift)), size) } else { mheap_.freeSpan(s, 1) } c.local_nlargefree++ c.local_largefree += size res = true } if trace.enabled { traceGCSweepDone() } return res }
// Allocate a span to use in an MCache. func (c *mcentral) cacheSpan() *mspan { // Deduct credit for this span allocation and sweep if necessary. spanBytes := uintptr(class_to_allocnpages[c.sizeclass]) * _PageSize deductSweepCredit(spanBytes, 0) lock(&c.lock) sg := mheap_.sweepgen retry: var s *mspan for s = c.nonempty.first; s != nil; s = s.next { if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { c.nonempty.remove(s) c.empty.insertBack(s) unlock(&c.lock) s.sweep(true) goto havespan } if s.sweepgen == sg-1 { // the span is being swept by background sweeper, skip continue } // we have a nonempty span that does not require sweeping, allocate from it c.nonempty.remove(s) c.empty.insertBack(s) unlock(&c.lock) goto havespan } for s = c.empty.first; s != nil; s = s.next { if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { // we have an empty span that requires sweeping, // sweep it and see if we can free some space in it c.empty.remove(s) // swept spans are at the end of the list c.empty.insertBack(s) unlock(&c.lock) s.sweep(true) freeIndex := s.nextFreeIndex() if freeIndex != s.nelems { s.freeindex = freeIndex goto havespan } lock(&c.lock) // the span is still empty after sweep // it is already in the empty list, so just retry goto retry } if s.sweepgen == sg-1 { // the span is being swept by background sweeper, skip continue } // already swept empty span, // all subsequent ones must also be either swept or in process of sweeping break } unlock(&c.lock) // Replenish central list if empty. s = c.grow() if s == nil { return nil } lock(&c.lock) c.empty.insertBack(s) unlock(&c.lock) // At this point s is a non-empty span, queued at the end of the empty list, // c is unlocked. havespan: cap := int32((s.npages << _PageShift) / s.elemsize) n := cap - int32(s.allocCount) if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { throw("span has no free objects") } usedBytes := uintptr(s.allocCount) * s.elemsize if usedBytes > 0 { reimburseSweepCredit(usedBytes) } atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes)) if trace.enabled { // heap_live changed. traceHeapAlloc() } if gcBlackenEnabled != 0 { // heap_live changed. gcController.revise() } s.incache = true freeByteBase := s.freeindex &^ (64 - 1) whichByte := freeByteBase / 8 // Init alloc bits cache. s.refillAllocCache(whichByte) // Adjust the allocCache so that s.freeindex corresponds to the low bit in // s.allocCache. s.allocCache >>= s.freeindex % 64 return s }