Beispiel #1
0
func TestCpy(t *testing.T) {
	N0, N1, N2 := 2, 4, 32
	N := N0 * N1 * N2
	mesh := data.NewMesh(N0, N1, N2, 1, 1, 1)

	h1 := make([]float32, N)
	for i := range h1 {
		h1[i] = float32(i)
	}
	hs := data.SliceFromList([][]float32{h1}, mesh)

	d := NewSlice(1, mesh)
	data.Copy(d, hs)

	d2 := NewSlice(1, mesh)
	data.Copy(d2, d)

	h2 := data.NewSlice(1, mesh)
	data.Copy(h2, d2)

	res := h2.Host()[0]
	for i := range res {
		if res[i] != h1[i] {
			t.Fail()
		}
	}
}
Beispiel #2
0
// Initialize GPU FFT kernel for 2D.
// Only the non-redundant parts are stored on the GPU.
func (c *DemagConvolution) initFFTKern2D() {
	padded := c.kernSize
	ffted := fftR2COutputSizeFloats(padded)
	realsize := ffted
	realsize[2] /= 2
	c.fftKernSize = realsize
	halfkern := realsize
	halfkern[1] = halfkern[1]/2 + 1
	fwPlan := c.fwPlan
	output := c.fftCBuf[0]
	input := c.fftRBuf[0]

	// upper triangular part
	fftKern := data.NewSlice(1, data.NewMesh(halfkern[0], halfkern[1], halfkern[2], 1, 1, 1))
	for i := 0; i < 3; i++ {
		for j := i; j < 3; j++ {
			if c.kern[i][j] != nil { // ignore 0's
				data.Copy(input, c.kern[i][j])
				fwPlan.Exec(input, output)
				scaleRealParts(fftKern, output.Slice(0, prod(halfkern)*2), 1/float32(fwPlan.InputLen()))
				c.gpuFFTKern[i][j] = GPUCopy(fftKern)
			}
		}
	}
}
Beispiel #3
0
func testConvolution(c *DemagConvolution, mesh *data.Mesh) {
	inhost := data.NewSlice(3, mesh)
	initConvTestInput(inhost.Vectors())
	gpu := NewSlice(3, mesh)
	defer gpu.Free()
	data.Copy(gpu, inhost)
	c.Exec(gpu, gpu, data.NilSlice(1, mesh), 1)

	output := gpu.HostCopy()
	//data.MustWriteFile("gpu.dump", output, 0) // rm!

	brute := data.NewSlice(3, mesh)
	bruteConv(inhost.Vectors(), brute.Vectors(), c.kern)
	//data.MustWriteFile("brute.dump", brute, 0) // rm!

	a, b := output.Host(), brute.Host()
	err := float32(0)
	for c := range a {
		for i := range a[c] {
			if abs(a[c][i]-b[c][i]) > err {
				err = abs(a[c][i] - b[c][i])
			}
		}
	}
	if err > CONV_TOLERANCE {
		log.Fatal("convolution self-test error: ", err)
	} else {
		log.Println("convolution self-test error:", err)
	}
}
Beispiel #4
0
func toGPU(list []float32) *data.Slice {
	mesh := data.NewMesh(1, 1, len(list), 1, 1, 1)
	h := data.SliceFromList([][]float32{list}, mesh)
	d := NewSlice(1, mesh)
	data.Copy(d, h)
	return d
}
Beispiel #5
0
// continuously takes download tasks and queues corresponding save tasks.
// the downloader queue is not buffered and we want to use at most one GPU
// output buffer. Only one PCIe download at a time can proceed anyway.
func runDownloader() {
	cuda.LockThread()

	for t := range dlQue {
		h := hostbuf()
		data.Copy(h, t.output) // output is already locked
		t.unlockOutput()
		saveQue <- saveTask{t.fname, h, t.time, func() { hBuf <- h }}
	}
	close(saveQue)
}
Beispiel #6
0
// Take one time step
func (e *Heun) Step() {
	dy0 := e.dy0
	dt := float32(e.Dt_si * e.dt_mul) // could check here if it is in float32 ranges
	util.Assert(dt > 0)

	// stage 1
	{
		Dy := e.torqueFn(true) // <- hook here for output, always good step output
		dy := Dy.Read()
		y := e.y.Write()
		Madd2(y, y, dy, 1, dt) // y = y + dt * dy
		e.y.WriteDone()
		data.Copy(dy0, dy)
		Dy.ReadDone()
	}

	// stage 2
	{
		*e.time += e.Dt_si
		Dy := e.torqueFn(false)
		dy := Dy.Read()

		err := 0.0
		if !e.Fixdt {
			err = MaxVecDiff(dy0, dy) * float64(dt)
			solverCheckErr(err)
		}

		y := e.y.Write()
		if err < e.MaxErr || e.Dt_si <= e.Mindt { // mindt check to avoid infinite loop
			// step OK
			Madd3(y, y, dy, dy0, 1, 0.5*dt, -0.5*dt)
			e.postStep(y)
			e.NSteps++
			e.adaptDt(math.Pow(e.MaxErr/err, 1./2.))
			e.LastErr = err
		} else {
			// undo bad step
			util.Assert(!e.Fixdt)
			*e.time -= e.Dt_si
			Madd2(y, y, dy0, 1, -dt)
			e.NUndone++
			e.adaptDt(math.Pow(e.MaxErr/err, 1./3.))
		}
		e.y.WriteDone()
		Dy.ReadDone()
	}
}
Beispiel #7
0
func main() {
	cuda.Init()

	N0, N1, N2 := 16, 16, 16
	c := 1.
	mesh := data.NewMesh(N0, N1, N2, c, c, c)

	m := cuda.NewSlice(3, mesh)
	conv := cuda.NewDemag(mesh)

	mhost := m.HostCopy()
	m_ := mhost.Vectors()
	r := float64(N2) / 2
	for i := 0; i < N0; i++ {
		x := c * (float64(i) + 0.5 - float64(N0)/2)
		for j := 0; j < N1; j++ {
			y := c * (float64(j) + 0.5 - float64(N1)/2)
			for k := 0; k < N2; k++ {
				z := c * (float64(k) + 0.5 - float64(N2)/2)
				if x*x+y*y+z*z < r*r {
					m_[0][i][j][k] = 1
					m_[1][i][j][k] = 2
					m_[2][i][j][k] = 3
				}
			}
		}
	}

	data.Copy(m, mhost)

	B := cuda.NewSlice(3, mesh)
	conv.Exec(B, m, data.NilSlice(1, mesh), 1)
	out := B.HostCopy()

	bx := out.Vectors()[0][N0/2][N1/2][N2/2]
	by := out.Vectors()[1][N0/2][N1/2][N2/2]
	bz := out.Vectors()[2][N0/2][N1/2][N2/2]
	fmt.Println("demag tensor:", bx, by/2, bz/3)
	check(bx, -1./3.)
	check(by, -2./3.)
	check(bz, -3./3.)
	fmt.Println("OK")
}
Beispiel #8
0
func (m *buffered) Upload(src *data.Slice) {
	m_ := m.Write()
	data.Copy(m_, src)
	m.WriteDone()
}
Beispiel #9
0
// Returns a copy of in, allocated on GPU.
func GPUCopy(in *data.Slice) *data.Slice {
	s := NewSlice(in.NComp(), in.Mesh())
	data.Copy(s, in)
	return s
}