// Variant of sync/atomic's TestUnaligned64: func TestUnaligned64(t *testing.T) { // Unaligned 64-bit atomics on 32-bit systems are // a continual source of pain. Test that on 32-bit systems they crash // instead of failing silently. switch runtime.GOARCH { default: if unsafe.Sizeof(int(0)) != 4 { t.Skip("test only runs on 32-bit systems") } case "amd64p32": // amd64p32 can handle unaligned atomics. t.Skipf("test not needed on %v", runtime.GOARCH) } x := make([]uint32, 4) up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned p64 := (*int64)(unsafe.Pointer(&x[1])) // misaligned shouldPanic(t, "Load64", func() { atomic.Load64(up64) }) shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) }) shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) }) shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) }) shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) }) shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) }) }
// tracestamp returns a consistent sequence number, time stamp pair // for use in a trace. We need to make sure that time stamp ordering // (assuming synchronized CPUs) and sequence ordering match. // To do that, we increment traceseq, grab ticks, and increment traceseq again. // We treat odd traceseq as a sign that another thread is in the middle // of the sequence and spin until it is done. // Not splitting stack to avoid preemption, just in case the call sites // that used to call xadd64 and cputicks are sensitive to that. //go:nosplit func tracestamp() (seq uint64, ts int64) { seq = atomic.Load64(&traceseq) for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) { seq = atomic.Load64(&traceseq) } ts = cputicks() atomic.Store64(&traceseq, seq+2) return seq >> 1, ts }
func testAtomic64() { test_z64 = 42 test_x64 = 0 prefetcht0(uintptr(unsafe.Pointer(&test_z64))) prefetcht1(uintptr(unsafe.Pointer(&test_z64))) prefetcht2(uintptr(unsafe.Pointer(&test_z64))) prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 0 { throw("cas64 failed") } test_x64 = 42 if !atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } if test_x64 != 42 || test_z64 != 1 { throw("cas64 failed") } if atomic.Load64(&test_z64) != 1 { throw("load64 failed") } atomic.Store64(&test_z64, (1<<40)+1) if atomic.Load64(&test_z64) != (1<<40)+1 { throw("store64 failed") } if atomic.Xadd64(&test_z64, (1<<40)+1) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Load64(&test_z64) != (2<<40)+2 { throw("xadd64 failed") } if atomic.Xchg64(&test_z64, (3<<40)+3) != (2<<40)+2 { throw("xchg64 failed") } if atomic.Load64(&test_z64) != (3<<40)+3 { throw("xchg64 failed") } }
func lfstackpop(head *uint64) unsafe.Pointer { for { old := atomic.Load64(head) if old == 0 { return nil } node := lfstackUnpack(old) next := atomic.Load64(&node.next) if atomic.Cas64(head, old, next) { return unsafe.Pointer(node) } } }
func lfstackpush(head *uint64, node *lfnode) { node.pushcnt++ new := lfstackPack(node, node.pushcnt) if node1 := lfstackUnpack(new); node1 != node { print("runtime: lfstackpush invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n") throw("lfstackpush") } for { old := atomic.Load64(head) node.next = old if atomic.Cas64(head, old, new) { break } } }
func parfordo(desc *parfor) { // Obtain 0-based thread index. tid := atomic.Xadd(&desc.thrseq, 1) - 1 if tid >= desc.nthr { print("tid=", tid, " nthr=", desc.nthr, "\n") throw("parfor: invalid tid") } // If single-threaded, just execute the for serially. body := desc.body if desc.nthr == 1 { for i := uint32(0); i < desc.cnt; i++ { body(desc, i) } return } me := &desc.thr[tid] mypos := &me.pos for { for { // While there is local work, // bump low index and execute the iteration. pos := atomic.Xadd64(mypos, 1) begin := uint32(pos) - 1 end := uint32(pos >> 32) if begin < end { body(desc, begin) continue } break } // Out of work, need to steal something. idle := false for try := uint32(0); ; try++ { // If we don't see any work for long enough, // increment the done counter... if try > desc.nthr*4 && !idle { idle = true atomic.Xadd(&desc.done, 1) } // ...if all threads have incremented the counter, // we are done. extra := uint32(0) if !idle { extra = 1 } if desc.done+extra == desc.nthr { if !idle { atomic.Xadd(&desc.done, 1) } goto exit } // Choose a random victim for stealing. var begin, end uint32 victim := fastrand1() % (desc.nthr - 1) if victim >= tid { victim++ } victimpos := &desc.thr[victim].pos for { // See if it has any work. pos := atomic.Load64(victimpos) begin = uint32(pos) end = uint32(pos >> 32) if begin+1 >= end { end = 0 begin = end break } if idle { atomic.Xadd(&desc.done, -1) idle = false } begin2 := begin + (end-begin)/2 newpos := uint64(begin) | uint64(begin2)<<32 if atomic.Cas64(victimpos, pos, newpos) { begin = begin2 break } } if begin < end { // Has successfully stolen some work. if idle { throw("parfor: should not be idle") } atomic.Store64(mypos, uint64(begin)|uint64(end)<<32) me.nsteal++ me.nstealcnt += uint64(end) - uint64(begin) break } // Backoff. if try < desc.nthr { // nothing } else if try < 4*desc.nthr { me.nprocyield++ procyield(20) } else if !desc.wait { // If a caller asked not to wait for the others, exit now // (assume that most work is already done at this point). if !idle { atomic.Xadd(&desc.done, 1) } goto exit } else if try < 6*desc.nthr { me.nosyield++ osyield() } else { me.nsleep++ usleep(1) } } } exit: atomic.Xadd64(&desc.nsteal, int64(me.nsteal)) atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt)) atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield)) atomic.Xadd64(&desc.nosyield, int64(me.nosyield)) atomic.Xadd64(&desc.nsleep, int64(me.nsleep)) me.nsteal = 0 me.nstealcnt = 0 me.nprocyield = 0 me.nosyield = 0 me.nsleep = 0 }