Example #1
0
func main() {
	cuda.Init()

	N0, N1, N2 := 1, 64, 128
	c := 1.
	mesh := data.NewMesh(N0, N1, N2, c/2, c*2, c)

	m := cuda.NewSlice(3, mesh)
	conv := cuda.NewDemag(mesh)
	cuda.Memset(m, 1, 1, 1)

	B := cuda.NewSlice(3, mesh)
	Bsat := 1.
	vol := data.NilSlice(1, mesh)
	conv.Exec(B, m, vol, Bsat)
	out := B.HostCopy()

	bx := out.Vectors()[0][N0/2][N1/2][N2/2]
	by := out.Vectors()[1][N0/2][N1/2][N2/2]
	bz := out.Vectors()[2][N0/2][N1/2][N2/2]
	fmt.Println("demag tensor:", bx, by, bz)
	check(bx, -1)
	check(by, 0)
	check(bz, 0)
	fmt.Println("OK")
}
Example #2
0
// returns host buffer for storing output before being flushed to disk.
// takes one from the pool or allocates a new one when the pool is empty
// and less than maxOutputQueLen buffers already are in use.
func hostbuf() *data.Slice {
	select {
	case b := <-hBuf:
		cuda.Memset(b, 0, 0, 0) // not strictly needed
		return b
	default:
		if nOutBuf < maxOutputQueLen {
			nOutBuf++
			return cuda.NewUnifiedSlice(3, mesh)
		}
	}
	panic("unreachable")
}
Example #3
0
// Memset with synchronization.
func (b *buffered) memset(val ...float32) {
	s := b.Write()
	cuda.Memset(s, val...)
	b.WriteDone()
}