Пример #1
0
// Read any OOMMF file, autodetect OVF1/OVF2 format
func Read(in io.Reader) (s *data.Slice, meta data.Meta, err error) {
	//in := fullReader{bufio.NewReader(in_)}
	info := readHeader(in)

	n := info.Size
	c := info.StepSize
	if c == [3]float64{0, 0, 0} {
		c = [3]float64{1, 1, 1} // default (presumably unitless) cell size
	}
	data_ := data.NewSlice(info.NComp, n)

	format := strings.ToLower(info.Format)
	ovf := info.OVF

	switch {
	default:
		panic(fmt.Sprint("unknown format: OVF", ovf, " ", format))
	case format == "text":
		readOVFDataText(in, data_)
	case format == "binary 4" && ovf == 1:
		readOVF1DataBinary4(in, data_)
	case format == "binary 8" && ovf == 1:
		readOVF1DataBinary8(in, data_)
	case format == "binary 4" && ovf == 2:
		readOVF2DataBinary4(in, data_)
	case format == "binary 8" && ovf == 2:
		readOVF2DataBinary8(in, data_)
	}

	return data_, data.Meta{Name: info.Title, Time: info.TotalTime, Unit: info.ValueUnit, CellSize: info.StepSize}, nil
}
Пример #2
0
func TestCpy(t *testing.T) {
	Init(0)
	N0, N1, N2 := 2, 4, 32
	N := N0 * N1 * N2
	mesh := [3]int{N0, N1, N2}

	h1 := make([]float32, N)
	for i := range h1 {
		h1[i] = float32(i)
	}
	hs := sliceFromList([][]float32{h1}, mesh)

	d := NewSlice(1, mesh)
	data.Copy(d, hs)

	d2 := NewSlice(1, mesh)
	data.Copy(d2, d)

	h2 := data.NewSlice(1, mesh)
	data.Copy(h2, d2)

	res := h2.Host()[0]
	for i := range res {
		if res[i] != h1[i] {
			t.Fail()
		}
	}
}
Пример #3
0
// Compares FFT-accelerated convolution against brute-force on sparse data.
// This is not really needed but very quickly uncovers newly introduced bugs.
func testConvolution(c *DemagConvolution, PBC [3]int, realKern [3][3]*data.Slice) {
	if PBC != [3]int{0, 0, 0} || prod(c.inputSize) > 512*512 {
		// the brute-force method does not work for pbc,
		// and for large simulations it gets just too slow.
		util.Log("skipping convolution self-test")
		return
	}
	//fmt.Print("convolution test ")
	inhost := data.NewSlice(3, c.inputSize)
	initConvTestInput(inhost.Vectors())
	gpu := NewSlice(3, c.inputSize)
	defer gpu.Free()
	data.Copy(gpu, inhost)

	regions := NewBytes(prod(c.inputSize))
	defer regions.Free()
	Bsat := NewSlice(1, [3]int{1, 1, 256})
	defer Bsat.Free()
	Memset(Bsat, 1)
	BsatLUT := LUTPtr(Bsat.DevPtr(0))

	vol := data.NilSlice(1, c.inputSize)
	c.Exec(gpu, gpu, vol, BsatLUT, regions)

	output := gpu.HostCopy()

	brute := data.NewSlice(3, c.inputSize)
	bruteConv(inhost.Vectors(), brute.Vectors(), realKern)

	a, b := output.Host(), brute.Host()
	err := float32(0)
	for c := range a {
		for i := range a[c] {
			if fabs(a[c][i]-b[c][i]) > err {
				err = fabs(a[c][i] - b[c][i])
			}
		}
	}
	if err > CONV_TOLERANCE {
		util.Fatal("convolution self-test tolerance: ", err, " FAIL")
	}
}
Пример #4
0
// Compares FFT-accelerated convolution against brute-force on sparse data.
// This is not really needed but very quickly uncovers newly introduced bugs.
func testConvolution(c *DemagConvolution, PBC [3]int, realKern [3][3]*data.Slice) {
	if PBC != [3]int{0, 0, 0} {
		// the brute-force method does not work for pbc.
		util.Log("skipping convolution self-test for PBC")
		return
	}
	util.Log("//convolution self-test...")
	inhost := data.NewSlice(3, c.inputSize)
	initConvTestInput(inhost.Vectors())
	gpu := NewSlice(3, c.inputSize)
	defer gpu.Free()
	data.Copy(gpu, inhost)

	Msat := NewSlice(1, [3]int{1, 1, 256})
	defer Msat.Free()
	Memset(Msat, 1)

	vol := data.NilSlice(1, c.inputSize)
	c.Exec(gpu, gpu, vol, ToMSlice(Msat))

	output := gpu.HostCopy()

	brute := data.NewSlice(3, c.inputSize)
	bruteConv(inhost.Vectors(), brute.Vectors(), realKern)

	a, b := output.Host(), brute.Host()
	err := float32(0)
	for c := range a {
		for i := range a[c] {
			if fabs(a[c][i]-b[c][i]) > err {
				err = fabs(a[c][i] - b[c][i])
			}
		}
	}
	if err > CONV_TOLERANCE {
		util.Fatal("convolution self-test tolerance: ", err, " FAIL")
	}
}
Пример #5
0
func output3D(D [][]complex64, reduce func(complex64) float32, size [3]int, prefix string, deltaF float32) {
	const NCOMP = 1
	for i := 0; i < len(D)/2; i++ {
		d := D[i]
		MHz := int((float32(i) * deltaF) / 1e6)
		fname := fmt.Sprintf("%sf%06dMHz.ovf", prefix, MHz)
		slice := data.NewSlice(NCOMP, size)
		doReduce(slice.Host()[0], d, reduce)
		meta := data.Meta{}
		log.Println(fname)
		f := httpfs.MustCreate(fname)
		oommf.WriteOVF2(f, slice, meta, "binary")
		f.Close()
	}
}
Пример #6
0
func compensateLRSurfaceCharges(m *data.Mesh, mxLeft, mxRight float64, bsat float64) *data.Slice {
	h := data.NewSlice(3, m.Size())
	H := h.Vectors()
	world := m.WorldSize()
	cell := m.CellSize()
	size := m.Size()
	q := cell[Z] * cell[Y] * bsat
	q1 := q * mxLeft
	q2 := q * (-mxRight)

	prog, maxProg := 0, (size[Z]+1)*(size[Y]+1)

	// surface loop (source)
	for I := 0; I < size[Z]; I++ {
		for J := 0; J < size[Y]; J++ {
			prog++
			util.Progress(prog, maxProg, "removing surface charges")

			y := (float64(J) + 0.5) * cell[Y]
			z := (float64(I) + 0.5) * cell[Z]
			source1 := [3]float64{0, y, z}        // left surface source
			source2 := [3]float64{world[X], y, z} // right surface source

			// volume loop (destination)
			for iz := range H[0] {
				for iy := range H[0][iz] {
					for ix := range H[0][iz][iy] {

						dst := [3]float64{ // destination coordinate
							(float64(ix) + 0.5) * cell[X],
							(float64(iy) + 0.5) * cell[Y],
							(float64(iz) + 0.5) * cell[Z]}

						h1 := hfield(q1, source1, dst)
						h2 := hfield(q2, source2, dst)

						// add this surface charges' field to grand total
						for c := 0; c < 3; c++ {
							H[c][iz][iy][ix] += float32(h1[c] + h2[c])
						}
					}
				}
			}
		}
	}
	return h
}
Пример #7
0
func main() {

	cuda.Init(0)
	defer Close()

	Nx := 512
	Ny := 128
	Nz := 1

	cellsize := 5.0e-9
	SetGridSize(Nx, Ny, Nz)
	thickness := 40e-9
	//width := float64(Ny) * cellsize
	length := float64(Nx) * cellsize
	SetCellSize(cellsize, cellsize, thickness/float64(Nz))

	mask := data.NewSlice(3, Mesh().Size())
	wireX := -length * 0.45
	//wireY := 0.0
	wireZ := thickness * 5.0

	for h := 0; h < 10; h++ {
		for i := 0; i < Nx; i++ {
			for j := 0; j < Ny; j++ {
				r := Index2Coord(i, j, 0)
				r = r.Sub(Vector(wireX+float64(h)*cellsize, r.Y(), wireZ))

				B := Vector(0, 0, 0)
				current := Vector(0, 1, 0)
				B = r.Cross(current).Mul(Mu0 / (2 * math.Pi * math.Pow(r.Len(), 2)))

				mask.Set(0, i, j, 0, B.X())
				mask.Set(1, i, j, 0, B.Y())
				mask.Set(2, i, j, 0, B.Z())
			}
		}
	}

	f, _ := os.OpenFile("antenna.ovf", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
	defer f.Close()
	oommf.WriteOVF2(f, mask, data.Meta{}, "binary 4")
}
Пример #8
0
// Calculates the magnetostatic kernel by brute-force integration
// of magnetic charges over the faces and averages over cell volumes.
func CalcDemagKernel(inputSize, pbc [3]int, cellsize [3]float64, accuracy float64) (kernel [3][3]*data.Slice) {

	// Add zero-padding in non-PBC directions
	size := padSize(inputSize, pbc)

	// Sanity check
	{
		util.Assert(size[Z] > 0 && size[Y] > 0 && size[X] > 0)
		util.Assert(cellsize[X] > 0 && cellsize[Y] > 0 && cellsize[Z] > 0)
		util.Assert(pbc[X] >= 0 && pbc[Y] >= 0 && pbc[Z] >= 0)
		util.Assert(accuracy > 0)
	}

	// Allocate only upper diagonal part. The rest is symmetric due to reciprocity.
	var array [3][3][][][]float32
	for i := 0; i < 3; i++ {
		for j := i; j < 3; j++ {
			kernel[i][j] = data.NewSlice(1, size)
			array[i][j] = kernel[i][j].Scalars()
		}
	}

	// Field (destination) loop ranges
	r1, r2 := kernelRanges(size, pbc)

	// smallest cell dimension is our typical length scale
	L := cellsize[X]
	{
		if cellsize[Y] < L {
			L = cellsize[Y]
		}
		if cellsize[Z] < L {
			L = cellsize[Z]
		}
	}

	progress, progmax := 0, (1+(r2[Y]-r1[Y]))*(1+(r2[Z]-r1[Z])) // progress bar
	done := make(chan struct{}, 3)                              // parallel calculation of one component done?

	// Start brute integration
	// 9 nested loops, does that stress you out?
	// Fortunately, the 5 inner ones usually loop over just one element.

	for s := 0; s < 3; s++ { // source index Ksdxyz (parallelized over)
		go func(s int) {
			u, v, w := s, (s+1)%3, (s+2)%3 // u = direction of source (s), v & w are the orthogonal directions
			var (
				R, R2  [3]float64 // field and source cell center positions
				pole   [3]float64 // position of point charge on the surface
				points int        // counts used integration points
			)

			for z := r1[Z]; z <= r2[Z]; z++ {
				zw := wrap(z, size[Z])
				// skip one half, reconstruct from symmetry later
				// check on wrapped index instead of loop range so it also works for PBC
				if zw > size[Z]/2 {
					if s == 0 {
						progress += (1 + (r2[Y] - r1[Y]))
					}
					continue
				}
				R[Z] = float64(z) * cellsize[Z]

				for y := r1[Y]; y <= r2[Y]; y++ {

					if s == 0 { // show progress of only one component
						progress++
						util.Progress(progress, progmax, "Calculating demag kernel")
					}

					yw := wrap(y, size[Y])
					if yw > size[Y]/2 {
						continue
					}
					R[Y] = float64(y) * cellsize[Y]

					for x := r1[X]; x <= r2[X]; x++ {
						xw := wrap(x, size[X])
						if xw > size[X]/2 {
							continue
						}
						R[X] = float64(x) * cellsize[X]

						// choose number of integration points depending on how far we are from source.
						dx, dy, dz := delta(x)*cellsize[X], delta(y)*cellsize[Y], delta(z)*cellsize[Z]
						d := math.Sqrt(dx*dx + dy*dy + dz*dz)
						if d == 0 {
							d = L
						}
						maxSize := d / accuracy // maximum acceptable integration size

						nv := int(math.Max(cellsize[v]/maxSize, 1) + 0.5)
						nw := int(math.Max(cellsize[w]/maxSize, 1) + 0.5)
						nx := int(math.Max(cellsize[X]/maxSize, 1) + 0.5)
						ny := int(math.Max(cellsize[Y]/maxSize, 1) + 0.5)
						nz := int(math.Max(cellsize[Z]/maxSize, 1) + 0.5)
						// Stagger source and destination grids.
						// Massively improves accuracy, see note.
						nv *= 2
						nw *= 2

						util.Assert(nv > 0 && nw > 0 && nx > 0 && ny > 0 && nz > 0)

						scale := 1 / float64(nv*nw*nx*ny*nz)
						surface := cellsize[v] * cellsize[w] // the two directions perpendicular to direction s
						charge := surface * scale
						pu1 := cellsize[u] / 2. // positive pole center
						pu2 := -pu1             // negative pole center

						// Do surface integral over source cell, accumulate  in B
						var B [3]float64
						for i := 0; i < nv; i++ {
							pv := -(cellsize[v] / 2.) + cellsize[v]/float64(2*nv) + float64(i)*(cellsize[v]/float64(nv))
							pole[v] = pv
							for j := 0; j < nw; j++ {
								pw := -(cellsize[w] / 2.) + cellsize[w]/float64(2*nw) + float64(j)*(cellsize[w]/float64(nw))
								pole[w] = pw

								// Do volume integral over destination cell
								for α := 0; α < nx; α++ {
									rx := R[X] - cellsize[X]/2 + cellsize[X]/float64(2*nx) + (cellsize[X]/float64(nx))*float64(α)

									for β := 0; β < ny; β++ {
										ry := R[Y] - cellsize[Y]/2 + cellsize[Y]/float64(2*ny) + (cellsize[Y]/float64(ny))*float64(β)

										for γ := 0; γ < nz; γ++ {
											rz := R[Z] - cellsize[Z]/2 + cellsize[Z]/float64(2*nz) + (cellsize[Z]/float64(nz))*float64(γ)
											points++

											pole[u] = pu1
											R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z]
											r := math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z])
											qr := charge / (4 * math.Pi * r * r * r)
											bx := R2[X] * qr
											by := R2[Y] * qr
											bz := R2[Z] * qr

											pole[u] = pu2
											R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z]
											r = math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z])
											qr = -charge / (4 * math.Pi * r * r * r)
											B[X] += (bx + R2[X]*qr) // addition ordered for accuracy
											B[Y] += (by + R2[Y]*qr)
											B[Z] += (bz + R2[Z]*qr)

										}
									}
								}
							}
						}
						for d := s; d < 3; d++ { // destination index Ksdxyz
							array[s][d][zw][yw][xw] += float32(B[d]) // += needed in case of PBC
						}
					}
				}
			}
			done <- struct{}{} // notify parallel computation of this component is done
		}(s)
	}
	// wait for all 3 components to finish
	<-done
	<-done
	<-done

	// Reconstruct skipped parts from symmetry (X)
	for z := 0; z < size[Z]; z++ {
		for y := 0; y < size[Y]; y++ {
			for x := size[X]/2 + 1; x < size[X]; x++ {
				x2 := size[X] - x
				array[X][X][z][y][x] = array[X][X][z][y][x2]
				array[X][Y][z][y][x] = -array[X][Y][z][y][x2]
				array[X][Z][z][y][x] = -array[X][Z][z][y][x2]
				array[Y][Y][z][y][x] = array[Y][Y][z][y][x2]
				array[Y][Z][z][y][x] = array[Y][Z][z][y][x2]
				array[Z][Z][z][y][x] = array[Z][Z][z][y][x2]
			}
		}
	}

	// Reconstruct skipped parts from symmetry (Y)
	for z := 0; z < size[Z]; z++ {
		for y := size[Y]/2 + 1; y < size[Y]; y++ {
			y2 := size[Y] - y
			for x := 0; x < size[X]; x++ {
				array[X][X][z][y][x] = array[X][X][z][y2][x]
				array[X][Y][z][y][x] = -array[X][Y][z][y2][x]
				array[X][Z][z][y][x] = array[X][Z][z][y2][x]
				array[Y][Y][z][y][x] = array[Y][Y][z][y2][x]
				array[Y][Z][z][y][x] = -array[Y][Z][z][y2][x]
				array[Z][Z][z][y][x] = array[Z][Z][z][y2][x]

			}
		}
	}

	// Reconstruct skipped parts from symmetry (Z)
	for z := size[Z]/2 + 1; z < size[Z]; z++ {
		z2 := size[Z] - z
		for y := 0; y < size[Y]; y++ {
			for x := 0; x < size[X]; x++ {
				array[X][X][z][y][x] = array[X][X][z2][y][x]
				array[X][Y][z][y][x] = array[X][Y][z2][y][x]
				array[X][Z][z][y][x] = -array[X][Z][z2][y][x]
				array[Y][Y][z][y][x] = array[Y][Y][z2][y][x]
				array[Y][Z][z][y][x] = -array[Y][Z][z2][y][x]
				array[Z][Z][z][y][x] = array[Z][Z][z2][y][x]
			}
		}
	}

	// for 2D these elements are zero:
	if size[Z] == 1 {
		kernel[X][Z] = nil
		kernel[Y][Z] = nil
	}
	// make result symmetric for tools that expect it so.
	kernel[Y][X] = kernel[X][Y]
	kernel[Z][X] = kernel[X][Z]
	kernel[Z][Y] = kernel[Y][Z]
	return kernel
}
Пример #9
0
func (c *DemagConvolution) init(realKern [3][3]*data.Slice) {
	// init device buffers
	// 2D re-uses fftBuf[X] as fftBuf[Z], 3D needs all 3 fftBufs.
	nc := fftR2COutputSizeFloats(c.realKernSize)
	c.fftCBuf[X] = NewSlice(1, nc)
	c.fftCBuf[Y] = NewSlice(1, nc)
	if c.is2D() {
		c.fftCBuf[Z] = c.fftCBuf[X]
	} else {
		c.fftCBuf[Z] = NewSlice(1, nc)
	}
	// Real buffer shares storage with Complex buffer
	for i := 0; i < 3; i++ {
		c.fftRBuf[i] = data.SliceFromPtrs(c.realKernSize, data.GPUMemory, []unsafe.Pointer{c.fftCBuf[i].DevPtr(0)})
	}

	// init FFT plans
	c.fwPlan = newFFT3DR2C(c.realKernSize[X], c.realKernSize[Y], c.realKernSize[Z])
	c.bwPlan = newFFT3DC2R(c.realKernSize[X], c.realKernSize[Y], c.realKernSize[Z])

	// init FFT kernel

	// logic size of FFT(kernel): store real parts only
	c.fftKernLogicSize = fftR2COutputSizeFloats(c.realKernSize)
	util.Assert(c.fftKernLogicSize[X]%2 == 0)
	c.fftKernLogicSize[X] /= 2

	// physical size of FFT(kernel): store only non-redundant part exploiting Y, Z mirror symmetry
	// X mirror symmetry already exploited: FFT(kernel) is purely real.
	physKSize := [3]int{c.fftKernLogicSize[X], c.fftKernLogicSize[Y]/2 + 1, c.fftKernLogicSize[Z]/2 + 1}

	output := c.fftCBuf[0]
	input := c.fftRBuf[0]
	fftKern := data.NewSlice(1, physKSize)
	kfull := data.NewSlice(1, output.Size()) // not yet exploiting symmetry
	kfulls := kfull.Scalars()
	kCSize := physKSize
	kCSize[X] *= 2                     // size of kernel after removing Y,Z redundant parts, but still complex
	kCmplx := data.NewSlice(1, kCSize) // not yet exploiting X symmetry
	kc := kCmplx.Scalars()

	for i := 0; i < 3; i++ {
		for j := i; j < 3; j++ { // upper triangular part
			if realKern[i][j] != nil { // ignore 0's
				// FW FFT
				data.Copy(input, realKern[i][j])
				c.fwPlan.ExecAsync(input, output)
				data.Copy(kfull, output)

				// extract non-redundant part (Y,Z symmetry)
				for iz := 0; iz < kCSize[Z]; iz++ {
					for iy := 0; iy < kCSize[Y]; iy++ {
						for ix := 0; ix < kCSize[X]; ix++ {
							kc[iz][iy][ix] = kfulls[iz][iy][ix]
						}
					}
				}

				// extract real parts (X symmetry)
				scaleRealParts(fftKern, kCmplx, 1/float32(c.fwPlan.InputLen()))
				c.kern[i][j] = GPUCopy(fftKern)
			}
		}
	}
}
Пример #10
0
func NewScalarMask(Nx, Ny, Nz int) *data.Slice {
	return data.NewSlice(1, [3]int{Nx, Ny, Nz})
}
Пример #11
0
func NewVectorMask(Nx, Ny, Nz int) *data.Slice {
	return data.NewSlice(3, [3]int{Nx, Ny, Nz})
}
Пример #12
0
// Returns a new new slice (3D array) with given number of components and size.
func NewSlice(ncomp, Nx, Ny, Nz int) *data.Slice {
	return data.NewSlice(ncomp, [3]int{Nx, Ny, Nz})
}
Пример #13
0
func (geometry *geom) setGeom(s Shape) {
	SetBusy(true)
	defer SetBusy(false)

	if s == nil {
		// TODO: would be nice not to save volume if entirely filled
		s = universe
	}

	geometry.shape = s
	if geometry.Gpu().IsNil() {
		geometry.buffer = cuda.NewSlice(1, geometry.Mesh().Size())
	}

	host := data.NewSlice(1, geometry.Gpu().Size())
	array := host.Scalars()
	V := host
	v := array
	n := geometry.Mesh().Size()
	c := geometry.Mesh().CellSize()
	cx, cy, cz := c[X], c[Y], c[Z]

	progress, progmax := 0, n[Y]*n[Z]

	var ok bool
	for iz := 0; iz < n[Z]; iz++ {
		for iy := 0; iy < n[Y]; iy++ {

			progress++
			util.Progress(progress, progmax, "Initializing geometry")

			for ix := 0; ix < n[X]; ix++ {

				r := Index2Coord(ix, iy, iz)
				x0, y0, z0 := r[X], r[Y], r[Z]

				// check if center and all vertices lie inside or all outside
				allIn, allOut := true, true
				if s(x0, y0, z0) {
					allOut = false
				} else {
					allIn = false
				}

				if edgeSmooth != 0 { // center is sufficient if we're not really smoothing
					for _, Δx := range []float64{-cx / 2, cx / 2} {
						for _, Δy := range []float64{-cy / 2, cy / 2} {
							for _, Δz := range []float64{-cz / 2, cz / 2} {
								if s(x0+Δx, y0+Δy, z0+Δz) { // inside
									allOut = false
								} else {
									allIn = false
								}
							}
						}
					}
				}

				switch {
				case allIn:
					v[iz][iy][ix] = 1
					ok = true
				case allOut:
					v[iz][iy][ix] = 0
				default:
					v[iz][iy][ix] = geometry.cellVolume(ix, iy, iz)
					ok = ok || (v[iz][iy][ix] != 0)
				}
			}
		}
	}

	if !ok {
		util.Fatal("SetGeom: geometry completely empty")
	}

	data.Copy(geometry.buffer, V)

	// M inside geom but previously outside needs to be re-inited
	needupload := false
	geomlist := host.Host()[0]
	mhost := M.Buffer().HostCopy()
	m := mhost.Host()
	rng := rand.New(rand.NewSource(0))
	for i := range m[0] {
		if geomlist[i] != 0 {
			mx, my, mz := m[X][i], m[Y][i], m[Z][i]
			if mx == 0 && my == 0 && mz == 0 {
				needupload = true
				rnd := randomDir(rng)
				m[X][i], m[Y][i], m[Z][i] = float32(rnd[X]), float32(rnd[Y]), float32(rnd[Z])
			}
		}
	}
	if needupload {
		data.Copy(M.Buffer(), mhost)
	}

	M.normalize() // removes m outside vol
}
Пример #14
0
// rescale and download quantity, save in rescaleBuf
func (ren *render) download() {
	InjectAndWait(func() {
		if ren.quant == nil { // not yet set, default = m
			ren.quant = &M
		}
		quant := ren.quant
		size := quant.Mesh().Size()

		// don't slice out of bounds
		renderLayer := ren.layer
		if renderLayer >= size[Z] {
			renderLayer = size[Z] - 1
		}
		if renderLayer < 0 {
			renderLayer = 0
		}

		// scaling sanity check
		if ren.scale < 1 {
			ren.scale = 1
		}
		if ren.scale > maxScale {
			ren.scale = maxScale
		}
		// Don't render too large images or we choke
		for size[X]/ren.scale > maxImgSize {
			ren.scale++
		}
		for size[Y]/ren.scale > maxImgSize {
			ren.scale++
		}

		for i := range size {
			size[i] /= ren.scale
			if size[i] == 0 {
				size[i] = 1
			}
		}
		size[Z] = 1 // selects one layer

		// make sure buffers are there
		if ren.imgBuf.Size() != size {
			ren.imgBuf = data.NewSlice(3, size) // always 3-comp, may be re-used
		}
		buf, r := quant.Slice()
		if r {
			defer cuda.Recycle(buf)
		}
		if !buf.GPUAccess() {
			ren.imgBuf = Download(quant) // fallback (no zoom)
			return
		}
		// make sure buffers are there (in CUDA context)
		if ren.rescaleBuf.Size() != size {
			ren.rescaleBuf.Free()
			ren.rescaleBuf = cuda.NewSlice(1, size)
		}
		for c := 0; c < quant.NComp(); c++ {
			cuda.Resize(ren.rescaleBuf, buf.Comp(c), renderLayer)
			data.Copy(ren.imgBuf.Comp(c), ren.rescaleBuf)
		}
	})
}
Пример #15
0
// Kernel for the vertical derivative of the force on an MFM tip due to mx, my, mz.
// This is the 2nd derivative of the energy w.r.t. z.
func MFMKernel(mesh *d.Mesh, lift, tipsize float64) (kernel [3]*d.Slice) {

	const TipCharge = 1 / Mu0 // tip charge
	const Δ = 1e-9            // tip oscillation, take 2nd derivative over this distance
	util.AssertMsg(lift > 0, "MFM tip crashed into sample, please lift the new one higher")

	{ // Kernel mesh is 2x larger than input, instead in case of PBC
		pbc := mesh.PBC()
		sz := padSize(mesh.Size(), pbc)
		cs := mesh.CellSize()
		mesh = d.NewMesh(sz[X], sz[Y], sz[Z], cs[X], cs[Y], cs[Z], pbc[:]...)
	}

	// Shorthand
	size := mesh.Size()
	pbc := mesh.PBC()
	cellsize := mesh.CellSize()
	volume := cellsize[X] * cellsize[Y] * cellsize[Z]
	fmt.Println("calculating MFM kernel")

	// Sanity check
	{
		util.Assert(size[Z] >= 1 && size[Y] >= 2 && size[X] >= 2)
		util.Assert(cellsize[X] > 0 && cellsize[Y] > 0 && cellsize[Z] > 0)
		util.AssertMsg(size[X]%2 == 0 && size[Y]%2 == 0, "Even kernel size needed")
		if size[Z] > 1 {
			util.AssertMsg(size[Z]%2 == 0, "Even kernel size needed")
		}
	}

	// Allocate only upper diagonal part. The rest is symmetric due to reciprocity.
	var K [3][][][]float32
	for i := 0; i < 3; i++ {
		kernel[i] = d.NewSlice(1, mesh.Size())
		K[i] = kernel[i].Scalars()
	}

	r1, r2 := kernelRanges(size, pbc)
	progress, progmax := 0, (1+r2[Y]-r1[Y])*(1+r2[Z]-r1[Z])

	for iz := r1[Z]; iz <= r2[Z]; iz++ {
		zw := wrap(iz, size[Z])
		z := float64(iz) * cellsize[Z]

		for iy := r1[Y]; iy <= r2[Y]; iy++ {
			yw := wrap(iy, size[Y])
			y := float64(iy) * cellsize[Y]
			progress++
			util.Progress(progress, progmax, "Calculating MFM kernel")

			for ix := r1[X]; ix <= r2[X]; ix++ {
				x := float64(ix) * cellsize[X]
				xw := wrap(ix, size[X])

				for s := 0; s < 3; s++ { // source index Ksxyz
					m := d.Vector{0, 0, 0}
					m[s] = 1

					var E [3]float64 // 3 energies for 2nd derivative

					for i := -1; i <= 1; i++ {
						I := float64(i)
						R := d.Vector{-x, -y, z - (lift + (I * Δ))}
						r := R.Len()
						B := R.Mul(TipCharge / (4 * math.Pi * r * r * r))

						R = d.Vector{-x, -y, z - (lift + tipsize + (I * Δ))}
						r = R.Len()
						B = B.Add(R.Mul(-TipCharge / (4 * math.Pi * r * r * r)))

						E[i+1] = B.Dot(m) * volume // i=-1 stored in  E[0]
					}

					dFdz_tip := ((E[0] - E[1]) + (E[2] - E[1])) / (Δ * Δ) // dFz/dz = d2E/dz2

					K[s][zw][yw][xw] += float32(dFdz_tip) // += needed in case of PBC
				}
			}
		}
	}

	return kernel
}
Пример #16
0
func (r *reader) readSlice() (s *data.Slice, info data.Meta, err error) {
	r.err = nil // clear previous error, if any
	magic := r.readString()
	if r.err != nil {
		return nil, data.Meta{}, r.err
	}
	if magic != MAGIC {
		r.err = fmt.Errorf("dump: bad magic number:%v", magic)
		return nil, data.Meta{}, r.err
	}
	nComp := r.readInt()
	size := [3]int{}
	size[2] = r.readInt() // backwards compatible coordinates!
	size[1] = r.readInt()
	size[0] = r.readInt()
	cell := [3]float64{}
	cell[2] = r.readFloat64()
	cell[1] = r.readFloat64()
	cell[0] = r.readFloat64()
	info.CellSize = cell

	info.MeshUnit = r.readString()
	info.Time = r.readFloat64()
	_ = r.readString() // time unit

	s = data.NewSlice(nComp, size)

	info.Name = r.readString()
	info.Unit = r.readString()
	precission := r.readUint64()
	util.AssertMsg(precission == 4, "only single precission supported")

	if r.err != nil {
		return
	}

	host := s.Tensors()
	ncomp := s.NComp()
	for c := 0; c < ncomp; c++ {
		for iz := 0; iz < size[2]; iz++ {
			for iy := 0; iy < size[1]; iy++ {
				for ix := 0; ix < size[0]; ix++ {
					host[c][iz][iy][ix] = r.readFloat32()
				}
			}
		}
	}

	// Check CRC
	var mycrc uint64 // checksum by this reader
	if r.crc != nil {
		mycrc = r.crc.Sum64()
	}
	storedcrc := r.readUint64() // checksum from data stream. 0 means not set
	if r.err != nil {
		return nil, data.Meta{}, r.err
	}
	if r.crc != nil {
		r.crc.Reset() // reset for next frame
	}
	if r.crc != nil && storedcrc != 0 && mycrc != storedcrc {
		r.err = fmt.Errorf("dump CRC error: expected %16x, got %16x", storedcrc, mycrc)
		return nil, data.Meta{}, r.err
	}

	return s, info, nil
}