// tracestamp returns a consistent sequence number, time stamp pair // for use in a trace. We need to make sure that time stamp ordering // (assuming synchronized CPUs) and sequence ordering match. // To do that, we increment traceseq, grab ticks, and increment traceseq again. // We treat odd traceseq as a sign that another thread is in the middle // of the sequence and spin until it is done. // Not splitting stack to avoid preemption, just in case the call sites // that used to call xadd64 and cputicks are sensitive to that. //go:nosplit func tracestamp() (seq uint64, ts int64) { seq = atomic.Load64(&traceseq) for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) { seq = atomic.Load64(&traceseq) } ts = cputicks() atomic.Store64(&traceseq, seq+2) return seq >> 1, ts }
func lfstackpop(head *uint64) unsafe.Pointer { for { old := atomic.Load64(head) if old == 0 { return nil } node := lfstackUnpack(old) next := atomic.Load64(&node.next) if atomic.Cas64(head, old, next) { return unsafe.Pointer(node) } } }
// Note: Called by runtime/pprof in addition to runtime code. func tickspersecond() int64 { r := int64(atomic.Load64(&ticks.val)) if r != 0 { return r } lock(&ticks.lock) r = int64(ticks.val) if r == 0 { t0 := nanotime() c0 := cputicks() usleep(100 * 1000) t1 := nanotime() c1 := cputicks() if t1 == t0 { t1++ } r = (c1 - c0) * 1000 * 1000 * 1000 / (t1 - t0) if r == 0 { r++ } atomic.Store64(&ticks.val, uint64(r)) } unlock(&ticks.lock) return r }
// Variant of sync/atomic's TestUnaligned64: func TestUnaligned64(t *testing.T) { // Unaligned 64-bit atomics on 32-bit systems are // a continual source of pain. Test that on 32-bit systems they crash // instead of failing silently. switch runtime.GOARCH { default: if unsafe.Sizeof(int(0)) != 4 { t.Skip("test only runs on 32-bit systems") } case "amd64p32": // amd64p32 can handle unaligned atomics. t.Skipf("test not needed on %v", runtime.GOARCH) } x := make([]uint32, 4) up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned p64 := (*int64)(unsafe.Pointer(&x[1])) // misaligned shouldPanic(t, "Load64", func() { atomic.Load64(up64) }) shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) }) shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) }) shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) }) shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) }) shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) }) }
func blocksampled(cycles int64) bool { rate := int64(atomic.Load64(&blockprofilerate)) if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) { return false } return true }
func BenchmarkAtomicLoad64(b *testing.B) { var x uint64 sink = &x for i := 0; i < b.N; i++ { _ = atomic.Load64(&x) } }
//go:linkname mutexevent sync.event func mutexevent(cycles int64, skip int) { if cycles < 0 { cycles = 0 } rate := int64(atomic.Load64(&mutexprofilerate)) // TODO(pjw): measure impact of always calling fastrand vs using something // like malloc.go:nextSample() if rate > 0 && int64(fastrand())%rate == 0 { saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate) } }
func testAtomic64() { test_z64 = 42 test_x64 = 0 prefetcht0(uintptr(unsafe.Pointer(&test_z64))) prefetcht1(uintptr(unsafe.Pointer(&test_z64))) prefetcht2(uintptr(unsafe.Pointer(&test_z64))) prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 0 { throw("cas64 failed") } test_x64 = 42 if !atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 42 || test_z64 != 1 { throw("cas64 failed") } if atomic.Load64(&test_z64) != 1 { throw("load64 failed") } atomic.Store64(&test_z64, (1<<40)+1) if atomic.Load64(&test_z64) != (1<<40)+1 { throw("store64 failed") } if atomic.Xadd64(&test_z64, (1<<40)+1) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Load64(&test_z64) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Xchg64(&test_z64, (3<<40)+3) != (2<<40)+2 { throw("xchg64 failed") } if atomic.Load64(&test_z64) != (3<<40)+3 { throw("xchg64 failed") } }
func lfstackpush(head *uint64, node *lfnode) { node.pushcnt++ new := lfstackPack(node, node.pushcnt) if node1 := lfstackUnpack(new); node1 != node { print("runtime: lfstackpush invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n") throw("lfstackpush") } for { old := atomic.Load64(head) node.next = old if atomic.Cas64(head, old, new) { break } } }
// deductSweepCredit deducts sweep credit for allocating a span of // size spanBytes. This must be performed *before* the span is // allocated to ensure the system has enough credit. If necessary, it // performs sweeping to prevent going in to debt. If the caller will // also sweep pages (e.g., for a large allocation), it can pass a // non-zero callerSweepPages to leave that many pages unswept. // // deductSweepCredit makes a worst-case assumption that all spanBytes // bytes of the ultimately allocated span will be available for object // allocation. The caller should call reimburseSweepCredit if that // turns out not to be the case once the span is allocated. // // deductSweepCredit is the core of the "proportional sweep" system. // It uses statistics gathered by the garbage collector to perform // enough sweeping so that all pages are swept during the concurrent // sweep phase between GC cycles. // // mheap_ must NOT be locked. func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { if mheap_.sweepPagesPerByte == 0 { // Proportional sweep is done or disabled. return } // Account for this span allocation. spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes)) // Fix debt if necessary. pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc)) for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) { if gosweepone() == ^uintptr(0) { mheap_.sweepPagesPerByte = 0 break } } }
func blockevent(cycles int64, skip int) { if cycles <= 0 { cycles = 1 } rate := int64(atomic.Load64(&blockprofilerate)) if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) { return } gp := getg() var nstk int var stk [maxStack]uintptr if gp.m.curg == nil || gp.m.curg == gp { nstk = callers(skip, stk[:]) } else { nstk = gcallers(gp.m.curg, skip, stk[:]) } lock(&proflock) b := stkbucket(blockProfile, 0, stk[:nstk], true) b.bp().count++ b.bp().cycles += cycles unlock(&proflock) }
func parfordo(desc *parfor) { // Obtain 0-based thread index. tid := atomic.Xadd(&desc.thrseq, 1) - 1 if tid >= desc.nthr { print("tid=", tid, " nthr=", desc.nthr, "\n") throw("parfor: invalid tid") } // If single-threaded, just execute the for serially. body := desc.body if desc.nthr == 1 { for i := uint32(0); i < desc.cnt; i++ { body(desc, i) } return } me := &desc.thr[tid] mypos := &me.pos for { for { // While there is local work, // bump low index and execute the iteration. pos := atomic.Xadd64(mypos, 1) begin := uint32(pos) - 1 end := uint32(pos >> 32) if begin < end { body(desc, begin) continue } break } // Out of work, need to steal something. idle := false for try := uint32(0); ; try++ { // If we don't see any work for long enough, // increment the done counter... if try > desc.nthr*4 && !idle { idle = true atomic.Xadd(&desc.done, 1) } // ...if all threads have incremented the counter, // we are done. extra := uint32(0) if !idle { extra = 1 } if desc.done+extra == desc.nthr { if !idle { atomic.Xadd(&desc.done, 1) } goto exit } // Choose a random victim for stealing. var begin, end uint32 victim := fastrand1() % (desc.nthr - 1) if victim >= tid { victim++ } victimpos := &desc.thr[victim].pos for { // See if it has any work. pos := atomic.Load64(victimpos) begin = uint32(pos) end = uint32(pos >> 32) if begin+1 >= end { end = 0 begin = end break } if idle { atomic.Xadd(&desc.done, -1) idle = false } begin2 := begin + (end-begin)/2 newpos := uint64(begin) | uint64(begin2)<<32 if atomic.Cas64(victimpos, pos, newpos) { begin = begin2 break } } if begin < end { // Has successfully stolen some work. if idle { throw("parfor: should not be idle") } atomic.Store64(mypos, uint64(begin)|uint64(end)<<32) me.nsteal++ me.nstealcnt += uint64(end) - uint64(begin) break } // Backoff. if try < desc.nthr { // nothing } else if try < 4*desc.nthr { me.nprocyield++ procyield(20) } else if !desc.wait { // If a caller asked not to wait for the others, exit now // (assume that most work is already done at this point). if !idle { atomic.Xadd(&desc.done, 1) } goto exit } else if try < 6*desc.nthr { me.nosyield++ osyield() } else { me.nsleep++ usleep(1) } } } exit: atomic.Xadd64(&desc.nsteal, int64(me.nsteal)) atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt)) atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield)) atomic.Xadd64(&desc.nosyield, int64(me.nosyield)) atomic.Xadd64(&desc.nsleep, int64(me.nsleep)) me.nsteal = 0 me.nstealcnt = 0 me.nprocyield = 0 me.nosyield = 0 me.nsleep = 0 }