Exemple #1
0
func TestSliceSlice(t *testing.T) {
	LockThread()
	N0, N1, N2 := 1, 10, 10
	c := 1e-6
	m := data.NewMesh(N0, N1, N2, c, c, c)
	a := NewUnifiedSlice(3, m)
	h := a.Host()
	h[1][21] = 42
	b := a.Slice(20, 30)
	if b.Len() != 30-20 {
		t.Fail()
	}
	if b.NComp() != a.NComp() {
		t.Fail()
	}
	if b.Host()[1][1] != 42 {
		t.Fail()
	}
	if *a.Mesh() != *b.Mesh() {
		t.Fail()
	}
	if a.MemType() != b.MemType() {
		t.Fail()
	}
}
Exemple #2
0
func TestCpy(t *testing.T) {
	N0, N1, N2 := 2, 4, 32
	N := N0 * N1 * N2
	mesh := data.NewMesh(N0, N1, N2, 1, 1, 1)

	h1 := make([]float32, N)
	for i := range h1 {
		h1[i] = float32(i)
	}
	hs := data.SliceFromList([][]float32{h1}, mesh)

	d := NewSlice(1, mesh)
	data.Copy(d, hs)

	d2 := NewSlice(1, mesh)
	data.Copy(d2, d)

	h2 := data.NewSlice(1, mesh)
	data.Copy(h2, d2)

	res := h2.Host()[0]
	for i := range res {
		if res[i] != h1[i] {
			t.Fail()
		}
	}
}
Exemple #3
0
// Initialize GPU FFT kernel for 2D.
// Only the non-redundant parts are stored on the GPU.
func (c *DemagConvolution) initFFTKern2D() {
	padded := c.kernSize
	ffted := fftR2COutputSizeFloats(padded)
	realsize := ffted
	realsize[2] /= 2
	c.fftKernSize = realsize
	halfkern := realsize
	halfkern[1] = halfkern[1]/2 + 1
	fwPlan := c.fwPlan
	output := c.fftCBuf[0]
	input := c.fftRBuf[0]

	// upper triangular part
	fftKern := data.NewSlice(1, data.NewMesh(halfkern[0], halfkern[1], halfkern[2], 1, 1, 1))
	for i := 0; i < 3; i++ {
		for j := i; j < 3; j++ {
			if c.kern[i][j] != nil { // ignore 0's
				data.Copy(input, c.kern[i][j])
				fwPlan.Exec(input, output)
				scaleRealParts(fftKern, output.Slice(0, prod(halfkern)*2), 1/float32(fwPlan.InputLen()))
				c.gpuFFTKern[i][j] = GPUCopy(fftKern)
			}
		}
	}
}
Exemple #4
0
func TestSlice(t *testing.T) {
	N0, N1, N2 := 2, 4, 8
	c := 1e-6
	m := data.NewMesh(N0, N1, N2, c, c, c)
	N := N0 * N1 * N2

	a := NewSlice(3, m)
	defer a.Free()
	Memset(a, 1, 2, 3)

	if a.GPUAccess() == false {
		t.Fail()
	}
	if a.Len() != N {
		t.Fail()
	}
	if a.NComp() != 3 {
		t.Fail()
	}

	b := a.Comp(1)
	if b.GPUAccess() == false {
		t.Error("b.GPUAccess", b.GPUAccess())
	}
	if b.Len() != N {
		t.Error("b.Len", b.Len())
	}
	if b.NComp() != 1 {
		t.Error("b.NComp", b.NComp())
	}
	if *b.Mesh() != *a.Mesh() {
		t.Fail()
	}
}
Exemple #5
0
func toGPU(list []float32) *data.Slice {
	mesh := data.NewMesh(1, 1, len(list), 1, 1, 1)
	h := data.SliceFromList([][]float32{list}, mesh)
	d := NewSlice(1, mesh)
	data.Copy(d, h)
	return d
}
Exemple #6
0
func main() {
	cuda.Init()

	N0, N1, N2 := 1, 64, 128
	c := 1.
	mesh := data.NewMesh(N0, N1, N2, c/2, c*2, c)

	m := cuda.NewSlice(3, mesh)
	conv := cuda.NewDemag(mesh)
	cuda.Memset(m, 1, 1, 1)

	B := cuda.NewSlice(3, mesh)
	Bsat := 1.
	vol := data.NilSlice(1, mesh)
	conv.Exec(B, m, vol, Bsat)
	out := B.HostCopy()

	bx := out.Vectors()[0][N0/2][N1/2][N2/2]
	by := out.Vectors()[1][N0/2][N1/2][N2/2]
	bz := out.Vectors()[2][N0/2][N1/2][N2/2]
	fmt.Println("demag tensor:", bx, by, bz)
	check(bx, -1)
	check(by, 0)
	check(bz, 0)
	fmt.Println("OK")
}
Exemple #7
0
// Set the simulation mesh to Nx x Ny x Nz cells of given size.
// Can be set only once at the beginning of the simulation.
func SetMesh(Nx, Ny, Nz int, cellSizeX, cellSizeY, cellSizeZ float64) {
	if mesh != nil {
		log.Fatal("mesh already set")
	}
	if Nx <= 1 {
		log.Fatal("mesh size X should be > 1, have: ", Nx)
	}
	mesh = data.NewMesh(Nz, Ny, Nx, cellSizeZ, cellSizeY, cellSizeX)
	log.Println("set mesh:", mesh.UserString())
	initialize()
}
Exemple #8
0
func TestSliceFree(t *testing.T) {
	LockThread()
	N0, N1, N2 := 128, 1024, 1024
	c := 1e-6
	m := data.NewMesh(N0, N1, N2, c, c, c)
	N := 17
	// not freeing would attempt to allocate 17GB.
	for i := 0; i < N; i++ {
		a := NewSlice(2, m)
		a.Free()
	}
	a := NewSlice(2, m)
	a.Free()
	a.Free() // test double-free
}
Exemple #9
0
func TestSliceHost(t *testing.T) {
	LockThread()
	N0, N1, N2 := 1, 10, 10
	c := 1e-6
	m := data.NewMesh(N0, N1, N2, c, c, c)
	a := NewUnifiedSlice(3, m)
	defer a.Free()

	b := a.Host()
	if b[0][0] != 0 || b[1][42] != 0 || b[2][99] != 0 {
		t.Fail()
	}

	Memset(a, 1, 2, 3)
	b = a.Host()
	if b[0][0] != 1 || b[1][42] != 2 || b[2][99] != 3 {
		t.Fail()
	}
}
Exemple #10
0
func main() {
	cuda.Init()

	N0, N1, N2 := 16, 16, 16
	c := 1.
	mesh := data.NewMesh(N0, N1, N2, c, c, c)

	m := cuda.NewSlice(3, mesh)
	conv := cuda.NewDemag(mesh)

	mhost := m.HostCopy()
	m_ := mhost.Vectors()
	r := float64(N2) / 2
	for i := 0; i < N0; i++ {
		x := c * (float64(i) + 0.5 - float64(N0)/2)
		for j := 0; j < N1; j++ {
			y := c * (float64(j) + 0.5 - float64(N1)/2)
			for k := 0; k < N2; k++ {
				z := c * (float64(k) + 0.5 - float64(N2)/2)
				if x*x+y*y+z*z < r*r {
					m_[0][i][j][k] = 1
					m_[1][i][j][k] = 2
					m_[2][i][j][k] = 3
				}
			}
		}
	}

	data.Copy(m, mhost)

	B := cuda.NewSlice(3, mesh)
	conv.Exec(B, m, data.NilSlice(1, mesh), 1)
	out := B.HostCopy()

	bx := out.Vectors()[0][N0/2][N1/2][N2/2]
	by := out.Vectors()[1][N0/2][N1/2][N2/2]
	bz := out.Vectors()[2][N0/2][N1/2][N2/2]
	fmt.Println("demag tensor:", bx, by/2, bz/3)
	check(bx, -1./3.)
	check(by, -2./3.)
	check(bz, -3./3.)
	fmt.Println("OK")
}
Exemple #11
0
// Calculates the magnetostatic kernel by brute-force integration
// of magnetic charges over the faces and averages over cell volumes.
// Mesh should NOT yet be zero-padded.
func BruteKernel(mesh *data.Mesh, accuracy float64) (kernel [3][3]*data.Slice) {

	{ // Kernel mesh is 2x larger than input, instead in case of PBC
		pbc := mesh.PBC()
		util.Argument(pbc == [3]int{0, 0, 0}) // PBC not supported yet
		sz := padSize(mesh.Size(), pbc)
		cs := mesh.CellSize()
		mesh = data.NewMesh(sz[0], sz[1], sz[2], cs[0], cs[1], cs[2], pbc[:]...)
	}

	// Shorthand
	size := mesh.Size()
	cellsize := mesh.CellSize()
	periodic := mesh.PBC()
	log.Println("calculating demag kernel:", "accuracy:", accuracy, ", size:", size[0], "x", size[1], "x", size[2])

	// Sanity check
	{
		util.Assert(size[0] > 0 && size[1] > 1 && size[2] > 1)
		util.Assert(cellsize[0] > 0 && cellsize[1] > 0 && cellsize[2] > 0)
		util.Assert(periodic[0] >= 0 && periodic[1] >= 0 && periodic[2] >= 0)
		util.Assert(accuracy > 0)
		// TODO: in case of PBC, this will not be met:
		util.Assert(size[1]%2 == 0 && size[2]%2 == 0)
		if size[0] > 1 {
			util.Assert(size[0]%2 == 0)
		}
	}

	// Allocate only upper diagonal part. The rest is symmetric due to reciprocity.
	var array [3][3][][][]float32
	for i := 0; i < 3; i++ {
		for j := i; j < 3; j++ {
			kernel[i][j] = data.NewSlice(1, mesh)
			array[i][j] = kernel[i][j].Scalars()
		}
	}

	// Field (destination) loop ranges
	x1, x2 := -(size[X]-1)/2, size[X]/2-1
	y1, y2 := -(size[Y]-1)/2, size[Y]/2-1
	z1, z2 := -(size[Z]-1)/2, size[Z]/2-1
	// support for 2D simulations (thickness 1)
	if size[X] == 1 && periodic[X] == 0 {
		x2 = 0
	}
	{ // Repeat for PBC:
		x1 *= (periodic[X] + 1)
		x2 *= (periodic[X] + 1)
		y1 *= (periodic[Y] + 1)
		y2 *= (periodic[Y] + 1)
		z1 *= (periodic[Z] + 1)
		z2 *= (periodic[Z] + 1)
	}

	// smallest cell dimension is our typical length scale
	L := cellsize[X]
	if cellsize[Y] < L {
		L = cellsize[Y]
	}
	if cellsize[Z] < L {
		L = cellsize[Z]
	}

	// Start brute integration
	// 9 nested loops, does that stress you out?
	// Fortunately, the 5 inner ones usually loop over just one element.
	// It might be nice to get rid of that branching though.
	var (
		R, R2  [3]float64 // field and source cell center positions
		pole   [3]float64 // position of point charge on the surface
		points int        // counts used integration points
	)
	for s := 0; s < 3; s++ { // source index Ksdxyz
		u, v, w := s, (s+1)%3, (s+2)%3 // u = direction of source (s), v & w are the orthogonal directions

		for x := x1; x <= x2; x++ { // in each dimension, go from -(size-1)/2 to size/2 -1, wrapped.
			xw := wrap(x, size[X])
			R[X] = float64(x) * cellsize[X]

			for y := y1; y <= y2; y++ {
				yw := wrap(y, size[Y])
				R[Y] = float64(y) * cellsize[Y]

				for z := z1; z <= z2; z++ {
					zw := wrap(z, size[Z])
					R[Z] = float64(z) * cellsize[Z]

					// choose number of integration points depending on how far we are from source.
					dx, dy, dz := delta(x)*cellsize[X], delta(y)*cellsize[Y], delta(z)*cellsize[Z]
					d := math.Sqrt(dx*dx + dy*dy + dz*dz)
					if d == 0 {
						d = L
					}
					maxSize := d / accuracy // maximum acceptable integration size
					nv := int(math.Max(cellsize[v]/maxSize, 1) + 0.5)
					nw := int(math.Max(cellsize[w]/maxSize, 1) + 0.5)
					nx := int(math.Max(cellsize[X]/maxSize, 1) + 0.5)
					ny := int(math.Max(cellsize[Y]/maxSize, 1) + 0.5)
					nz := int(math.Max(cellsize[Z]/maxSize, 1) + 0.5)
					// Stagger source and destination grids.
					// Massively improves accuracy. Could play with variations.
					// See note.
					nv *= 2
					nw *= 2

					util.Assert(nv > 0 && nw > 0 && nx > 0 && ny > 0 && nz > 0)

					scale := 1 / float64(nv*nw*nx*ny*nz)
					surface := cellsize[v] * cellsize[w] // the two directions perpendicular to direction s
					charge := surface * scale
					pu1 := cellsize[u] / 2. // positive pole center
					pu2 := -pu1             // negative pole center

					// Do surface integral over source cell, accumulate  in B
					var B [3]float64
					for i := 0; i < nv; i++ {
						pv := -(cellsize[v] / 2.) + cellsize[v]/float64(2*nv) + float64(i)*(cellsize[v]/float64(nv))
						pole[v] = pv
						for j := 0; j < nw; j++ {
							pw := -(cellsize[w] / 2.) + cellsize[w]/float64(2*nw) + float64(j)*(cellsize[w]/float64(nw))
							pole[w] = pw

							// Do volume integral over destination cell
							for α := 0; α < nx; α++ {
								rx := R[X] - cellsize[X]/2 + cellsize[X]/float64(2*nx) + (cellsize[X]/float64(nx))*float64(α)

								for β := 0; β < ny; β++ {
									ry := R[Y] - cellsize[Y]/2 + cellsize[Y]/float64(2*ny) + (cellsize[Y]/float64(ny))*float64(β)

									for γ := 0; γ < nz; γ++ {
										rz := R[Z] - cellsize[Z]/2 + cellsize[Z]/float64(2*nz) + (cellsize[Z]/float64(nz))*float64(γ)
										points++

										pole[u] = pu1
										R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z]
										r := math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z])
										qr := charge / (4 * math.Pi * r * r * r)
										bx := R2[X] * qr
										by := R2[Y] * qr
										bz := R2[Z] * qr

										pole[u] = pu2
										R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z]
										r = math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z])
										qr = -charge / (4 * math.Pi * r * r * r)
										B[X] += (bx + R2[X]*qr) // addition ordered for accuracy
										B[Y] += (by + R2[Y]*qr)
										B[Z] += (bz + R2[Z]*qr)

									}
								}
							}
						}
					}
					for d := s; d < 3; d++ { // destination index Ksdxyz
						// TODO: for PBC, need to add here
						array[s][d][xw][yw][zw] = float32(B[d])
					}
				}
			}
		}
	}
	log.Println("kernel used", points, "integration points")
	// for 2D these elements are zero:
	if size[0] == 1 {
		kernel[0][1] = nil
		kernel[0][2] = nil
	}
	// make result symmetric for tools that expect it so.
	kernel[1][0] = kernel[0][1]
	kernel[2][0] = kernel[0][2]
	kernel[2][1] = kernel[1][2]
	return kernel
}
Exemple #12
0
// TODO: spill if does not fit GPU?
func makeFloats(size [3]int) *data.Slice {
	m := data.NewMesh(size[0], size[1], size[2], 1, 1, 1)
	return NewSlice(1, m)
}