Esempio n. 1
0
// tracestamp returns a consistent sequence number, time stamp pair
// for use in a trace. We need to make sure that time stamp ordering
// (assuming synchronized CPUs) and sequence ordering match.
// To do that, we increment traceseq, grab ticks, and increment traceseq again.
// We treat odd traceseq as a sign that another thread is in the middle
// of the sequence and spin until it is done.
// Not splitting stack to avoid preemption, just in case the call sites
// that used to call xadd64 and cputicks are sensitive to that.
//go:nosplit
func tracestamp() (seq uint64, ts int64) {
	seq = atomic.Load64(&traceseq)
	for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) {
		seq = atomic.Load64(&traceseq)
	}
	ts = cputicks()
	atomic.Store64(&traceseq, seq+2)
	return seq >> 1, ts
}
Esempio n. 2
0
func lfstackpop(head *uint64) unsafe.Pointer {
	for {
		old := atomic.Load64(head)
		if old == 0 {
			return nil
		}
		node := lfstackUnpack(old)
		next := atomic.Load64(&node.next)
		if atomic.Cas64(head, old, next) {
			return unsafe.Pointer(node)
		}
	}
}
Esempio n. 3
0
// Note: Called by runtime/pprof in addition to runtime code.
func tickspersecond() int64 {
	r := int64(atomic.Load64(&ticks.val))
	if r != 0 {
		return r
	}
	lock(&ticks.lock)
	r = int64(ticks.val)
	if r == 0 {
		t0 := nanotime()
		c0 := cputicks()
		usleep(100 * 1000)
		t1 := nanotime()
		c1 := cputicks()
		if t1 == t0 {
			t1++
		}
		r = (c1 - c0) * 1000 * 1000 * 1000 / (t1 - t0)
		if r == 0 {
			r++
		}
		atomic.Store64(&ticks.val, uint64(r))
	}
	unlock(&ticks.lock)
	return r
}
Esempio n. 4
0
// Variant of sync/atomic's TestUnaligned64:
func TestUnaligned64(t *testing.T) {
	// Unaligned 64-bit atomics on 32-bit systems are
	// a continual source of pain. Test that on 32-bit systems they crash
	// instead of failing silently.

	switch runtime.GOARCH {
	default:
		if unsafe.Sizeof(int(0)) != 4 {
			t.Skip("test only runs on 32-bit systems")
		}
	case "amd64p32":
		// amd64p32 can handle unaligned atomics.
		t.Skipf("test not needed on %v", runtime.GOARCH)
	}

	x := make([]uint32, 4)
	up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned
	p64 := (*int64)(unsafe.Pointer(&x[1]))   // misaligned

	shouldPanic(t, "Load64", func() { atomic.Load64(up64) })
	shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) })
	shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) })
	shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) })
	shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) })
	shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) })
}
Esempio n. 5
0
File: mprof.go Progetto: achanda/go
func blocksampled(cycles int64) bool {
	rate := int64(atomic.Load64(&blockprofilerate))
	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
		return false
	}
	return true
}
Esempio n. 6
0
func BenchmarkAtomicLoad64(b *testing.B) {
	var x uint64
	sink = &x
	for i := 0; i < b.N; i++ {
		_ = atomic.Load64(&x)
	}
}
Esempio n. 7
0
File: mprof.go Progetto: achanda/go
//go:linkname mutexevent sync.event
func mutexevent(cycles int64, skip int) {
	if cycles < 0 {
		cycles = 0
	}
	rate := int64(atomic.Load64(&mutexprofilerate))
	// TODO(pjw): measure impact of always calling fastrand vs using something
	// like malloc.go:nextSample()
	if rate > 0 && int64(fastrand())%rate == 0 {
		saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
	}
}
Esempio n. 8
0
func testAtomic64() {
	test_z64 = 42
	test_x64 = 0
	prefetcht0(uintptr(unsafe.Pointer(&test_z64)))
	prefetcht1(uintptr(unsafe.Pointer(&test_z64)))
	prefetcht2(uintptr(unsafe.Pointer(&test_z64)))
	prefetchnta(uintptr(unsafe.Pointer(&test_z64)))
	if atomic.Cas64(&test_z64, test_x64, 1) {
		throw("cas64 failed")
	}
	if test_x64 != 0 {
		throw("cas64 failed")
	}
	test_x64 = 42
	if !atomic.Cas64(&test_z64, test_x64, 1) {
		throw("cas64 failed")
	}
	if test_x64 != 42 || test_z64 != 1 {
		throw("cas64 failed")
	}
	if atomic.Load64(&test_z64) != 1 {
		throw("load64 failed")
	}
	atomic.Store64(&test_z64, (1<<40)+1)
	if atomic.Load64(&test_z64) != (1<<40)+1 {
		throw("store64 failed")
	}
	if atomic.Xadd64(&test_z64, (1<<40)+1) != (2<<40)+2 {
		throw("xadd64 failed")
	}
	if atomic.Load64(&test_z64) != (2<<40)+2 {
		throw("xadd64 failed")
	}
	if atomic.Xchg64(&test_z64, (3<<40)+3) != (2<<40)+2 {
		throw("xchg64 failed")
	}
	if atomic.Load64(&test_z64) != (3<<40)+3 {
		throw("xchg64 failed")
	}
}
Esempio n. 9
0
func lfstackpush(head *uint64, node *lfnode) {
	node.pushcnt++
	new := lfstackPack(node, node.pushcnt)
	if node1 := lfstackUnpack(new); node1 != node {
		print("runtime: lfstackpush invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n")
		throw("lfstackpush")
	}
	for {
		old := atomic.Load64(head)
		node.next = old
		if atomic.Cas64(head, old, new) {
			break
		}
	}
}
Esempio n. 10
0
// deductSweepCredit deducts sweep credit for allocating a span of
// size spanBytes. This must be performed *before* the span is
// allocated to ensure the system has enough credit. If necessary, it
// performs sweeping to prevent going in to debt. If the caller will
// also sweep pages (e.g., for a large allocation), it can pass a
// non-zero callerSweepPages to leave that many pages unswept.
//
// deductSweepCredit makes a worst-case assumption that all spanBytes
// bytes of the ultimately allocated span will be available for object
// allocation. The caller should call reimburseSweepCredit if that
// turns out not to be the case once the span is allocated.
//
// deductSweepCredit is the core of the "proportional sweep" system.
// It uses statistics gathered by the garbage collector to perform
// enough sweeping so that all pages are swept during the concurrent
// sweep phase between GC cycles.
//
// mheap_ must NOT be locked.
func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) {
	if mheap_.sweepPagesPerByte == 0 {
		// Proportional sweep is done or disabled.
		return
	}

	// Account for this span allocation.
	spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes))

	// Fix debt if necessary.
	pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc))
	for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) {
		if gosweepone() == ^uintptr(0) {
			mheap_.sweepPagesPerByte = 0
			break
		}
	}
}
Esempio n. 11
0
File: mprof.go Progetto: dushmis/go
func blockevent(cycles int64, skip int) {
	if cycles <= 0 {
		cycles = 1
	}
	rate := int64(atomic.Load64(&blockprofilerate))
	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
		return
	}
	gp := getg()
	var nstk int
	var stk [maxStack]uintptr
	if gp.m.curg == nil || gp.m.curg == gp {
		nstk = callers(skip, stk[:])
	} else {
		nstk = gcallers(gp.m.curg, skip, stk[:])
	}
	lock(&proflock)
	b := stkbucket(blockProfile, 0, stk[:nstk], true)
	b.bp().count++
	b.bp().cycles += cycles
	unlock(&proflock)
}
Esempio n. 12
0
File: parfor.go Progetto: hAhmadz/go
func parfordo(desc *parfor) {
	// Obtain 0-based thread index.
	tid := atomic.Xadd(&desc.thrseq, 1) - 1
	if tid >= desc.nthr {
		print("tid=", tid, " nthr=", desc.nthr, "\n")
		throw("parfor: invalid tid")
	}

	// If single-threaded, just execute the for serially.
	body := desc.body
	if desc.nthr == 1 {
		for i := uint32(0); i < desc.cnt; i++ {
			body(desc, i)
		}
		return
	}

	me := &desc.thr[tid]
	mypos := &me.pos
	for {
		for {
			// While there is local work,
			// bump low index and execute the iteration.
			pos := atomic.Xadd64(mypos, 1)
			begin := uint32(pos) - 1
			end := uint32(pos >> 32)
			if begin < end {
				body(desc, begin)
				continue
			}
			break
		}

		// Out of work, need to steal something.
		idle := false
		for try := uint32(0); ; try++ {
			// If we don't see any work for long enough,
			// increment the done counter...
			if try > desc.nthr*4 && !idle {
				idle = true
				atomic.Xadd(&desc.done, 1)
			}

			// ...if all threads have incremented the counter,
			// we are done.
			extra := uint32(0)
			if !idle {
				extra = 1
			}
			if desc.done+extra == desc.nthr {
				if !idle {
					atomic.Xadd(&desc.done, 1)
				}
				goto exit
			}

			// Choose a random victim for stealing.
			var begin, end uint32
			victim := fastrand1() % (desc.nthr - 1)
			if victim >= tid {
				victim++
			}
			victimpos := &desc.thr[victim].pos
			for {
				// See if it has any work.
				pos := atomic.Load64(victimpos)
				begin = uint32(pos)
				end = uint32(pos >> 32)
				if begin+1 >= end {
					end = 0
					begin = end
					break
				}
				if idle {
					atomic.Xadd(&desc.done, -1)
					idle = false
				}
				begin2 := begin + (end-begin)/2
				newpos := uint64(begin) | uint64(begin2)<<32
				if atomic.Cas64(victimpos, pos, newpos) {
					begin = begin2
					break
				}
			}
			if begin < end {
				// Has successfully stolen some work.
				if idle {
					throw("parfor: should not be idle")
				}
				atomic.Store64(mypos, uint64(begin)|uint64(end)<<32)
				me.nsteal++
				me.nstealcnt += uint64(end) - uint64(begin)
				break
			}

			// Backoff.
			if try < desc.nthr {
				// nothing
			} else if try < 4*desc.nthr {
				me.nprocyield++
				procyield(20)
			} else if !desc.wait {
				// If a caller asked not to wait for the others, exit now
				// (assume that most work is already done at this point).
				if !idle {
					atomic.Xadd(&desc.done, 1)
				}
				goto exit
			} else if try < 6*desc.nthr {
				me.nosyield++
				osyield()
			} else {
				me.nsleep++
				usleep(1)
			}
		}
	}

exit:
	atomic.Xadd64(&desc.nsteal, int64(me.nsteal))
	atomic.Xadd64(&desc.nstealcnt, int64(me.nstealcnt))
	atomic.Xadd64(&desc.nprocyield, int64(me.nprocyield))
	atomic.Xadd64(&desc.nosyield, int64(me.nosyield))
	atomic.Xadd64(&desc.nsleep, int64(me.nsleep))
	me.nsteal = 0
	me.nstealcnt = 0
	me.nprocyield = 0
	me.nosyield = 0
	me.nsleep = 0
}