// Read any OOMMF file, autodetect OVF1/OVF2 format func Read(in io.Reader) (s *data.Slice, meta data.Meta, err error) { //in := fullReader{bufio.NewReader(in_)} info := readHeader(in) n := info.Size c := info.StepSize if c == [3]float64{0, 0, 0} { c = [3]float64{1, 1, 1} // default (presumably unitless) cell size } data_ := data.NewSlice(info.NComp, n) format := strings.ToLower(info.Format) ovf := info.OVF switch { default: panic(fmt.Sprint("unknown format: OVF", ovf, " ", format)) case format == "text": readOVFDataText(in, data_) case format == "binary 4" && ovf == 1: readOVF1DataBinary4(in, data_) case format == "binary 8" && ovf == 1: readOVF1DataBinary8(in, data_) case format == "binary 4" && ovf == 2: readOVF2DataBinary4(in, data_) case format == "binary 8" && ovf == 2: readOVF2DataBinary8(in, data_) } return data_, data.Meta{Name: info.Title, Time: info.TotalTime, Unit: info.ValueUnit, CellSize: info.StepSize}, nil }
func TestCpy(t *testing.T) { Init(0) N0, N1, N2 := 2, 4, 32 N := N0 * N1 * N2 mesh := [3]int{N0, N1, N2} h1 := make([]float32, N) for i := range h1 { h1[i] = float32(i) } hs := sliceFromList([][]float32{h1}, mesh) d := NewSlice(1, mesh) data.Copy(d, hs) d2 := NewSlice(1, mesh) data.Copy(d2, d) h2 := data.NewSlice(1, mesh) data.Copy(h2, d2) res := h2.Host()[0] for i := range res { if res[i] != h1[i] { t.Fail() } } }
// Compares FFT-accelerated convolution against brute-force on sparse data. // This is not really needed but very quickly uncovers newly introduced bugs. func testConvolution(c *DemagConvolution, PBC [3]int, realKern [3][3]*data.Slice) { if PBC != [3]int{0, 0, 0} || prod(c.inputSize) > 512*512 { // the brute-force method does not work for pbc, // and for large simulations it gets just too slow. util.Log("skipping convolution self-test") return } //fmt.Print("convolution test ") inhost := data.NewSlice(3, c.inputSize) initConvTestInput(inhost.Vectors()) gpu := NewSlice(3, c.inputSize) defer gpu.Free() data.Copy(gpu, inhost) regions := NewBytes(prod(c.inputSize)) defer regions.Free() Bsat := NewSlice(1, [3]int{1, 1, 256}) defer Bsat.Free() Memset(Bsat, 1) BsatLUT := LUTPtr(Bsat.DevPtr(0)) vol := data.NilSlice(1, c.inputSize) c.Exec(gpu, gpu, vol, BsatLUT, regions) output := gpu.HostCopy() brute := data.NewSlice(3, c.inputSize) bruteConv(inhost.Vectors(), brute.Vectors(), realKern) a, b := output.Host(), brute.Host() err := float32(0) for c := range a { for i := range a[c] { if fabs(a[c][i]-b[c][i]) > err { err = fabs(a[c][i] - b[c][i]) } } } if err > CONV_TOLERANCE { util.Fatal("convolution self-test tolerance: ", err, " FAIL") } }
// Compares FFT-accelerated convolution against brute-force on sparse data. // This is not really needed but very quickly uncovers newly introduced bugs. func testConvolution(c *DemagConvolution, PBC [3]int, realKern [3][3]*data.Slice) { if PBC != [3]int{0, 0, 0} { // the brute-force method does not work for pbc. util.Log("skipping convolution self-test for PBC") return } util.Log("//convolution self-test...") inhost := data.NewSlice(3, c.inputSize) initConvTestInput(inhost.Vectors()) gpu := NewSlice(3, c.inputSize) defer gpu.Free() data.Copy(gpu, inhost) Msat := NewSlice(1, [3]int{1, 1, 256}) defer Msat.Free() Memset(Msat, 1) vol := data.NilSlice(1, c.inputSize) c.Exec(gpu, gpu, vol, ToMSlice(Msat)) output := gpu.HostCopy() brute := data.NewSlice(3, c.inputSize) bruteConv(inhost.Vectors(), brute.Vectors(), realKern) a, b := output.Host(), brute.Host() err := float32(0) for c := range a { for i := range a[c] { if fabs(a[c][i]-b[c][i]) > err { err = fabs(a[c][i] - b[c][i]) } } } if err > CONV_TOLERANCE { util.Fatal("convolution self-test tolerance: ", err, " FAIL") } }
func output3D(D [][]complex64, reduce func(complex64) float32, size [3]int, prefix string, deltaF float32) { const NCOMP = 1 for i := 0; i < len(D)/2; i++ { d := D[i] MHz := int((float32(i) * deltaF) / 1e6) fname := fmt.Sprintf("%sf%06dMHz.ovf", prefix, MHz) slice := data.NewSlice(NCOMP, size) doReduce(slice.Host()[0], d, reduce) meta := data.Meta{} log.Println(fname) f := httpfs.MustCreate(fname) oommf.WriteOVF2(f, slice, meta, "binary") f.Close() } }
func compensateLRSurfaceCharges(m *data.Mesh, mxLeft, mxRight float64, bsat float64) *data.Slice { h := data.NewSlice(3, m.Size()) H := h.Vectors() world := m.WorldSize() cell := m.CellSize() size := m.Size() q := cell[Z] * cell[Y] * bsat q1 := q * mxLeft q2 := q * (-mxRight) prog, maxProg := 0, (size[Z]+1)*(size[Y]+1) // surface loop (source) for I := 0; I < size[Z]; I++ { for J := 0; J < size[Y]; J++ { prog++ util.Progress(prog, maxProg, "removing surface charges") y := (float64(J) + 0.5) * cell[Y] z := (float64(I) + 0.5) * cell[Z] source1 := [3]float64{0, y, z} // left surface source source2 := [3]float64{world[X], y, z} // right surface source // volume loop (destination) for iz := range H[0] { for iy := range H[0][iz] { for ix := range H[0][iz][iy] { dst := [3]float64{ // destination coordinate (float64(ix) + 0.5) * cell[X], (float64(iy) + 0.5) * cell[Y], (float64(iz) + 0.5) * cell[Z]} h1 := hfield(q1, source1, dst) h2 := hfield(q2, source2, dst) // add this surface charges' field to grand total for c := 0; c < 3; c++ { H[c][iz][iy][ix] += float32(h1[c] + h2[c]) } } } } } } return h }
func main() { cuda.Init(0) defer Close() Nx := 512 Ny := 128 Nz := 1 cellsize := 5.0e-9 SetGridSize(Nx, Ny, Nz) thickness := 40e-9 //width := float64(Ny) * cellsize length := float64(Nx) * cellsize SetCellSize(cellsize, cellsize, thickness/float64(Nz)) mask := data.NewSlice(3, Mesh().Size()) wireX := -length * 0.45 //wireY := 0.0 wireZ := thickness * 5.0 for h := 0; h < 10; h++ { for i := 0; i < Nx; i++ { for j := 0; j < Ny; j++ { r := Index2Coord(i, j, 0) r = r.Sub(Vector(wireX+float64(h)*cellsize, r.Y(), wireZ)) B := Vector(0, 0, 0) current := Vector(0, 1, 0) B = r.Cross(current).Mul(Mu0 / (2 * math.Pi * math.Pow(r.Len(), 2))) mask.Set(0, i, j, 0, B.X()) mask.Set(1, i, j, 0, B.Y()) mask.Set(2, i, j, 0, B.Z()) } } } f, _ := os.OpenFile("antenna.ovf", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) defer f.Close() oommf.WriteOVF2(f, mask, data.Meta{}, "binary 4") }
// Calculates the magnetostatic kernel by brute-force integration // of magnetic charges over the faces and averages over cell volumes. func CalcDemagKernel(inputSize, pbc [3]int, cellsize [3]float64, accuracy float64) (kernel [3][3]*data.Slice) { // Add zero-padding in non-PBC directions size := padSize(inputSize, pbc) // Sanity check { util.Assert(size[Z] > 0 && size[Y] > 0 && size[X] > 0) util.Assert(cellsize[X] > 0 && cellsize[Y] > 0 && cellsize[Z] > 0) util.Assert(pbc[X] >= 0 && pbc[Y] >= 0 && pbc[Z] >= 0) util.Assert(accuracy > 0) } // Allocate only upper diagonal part. The rest is symmetric due to reciprocity. var array [3][3][][][]float32 for i := 0; i < 3; i++ { for j := i; j < 3; j++ { kernel[i][j] = data.NewSlice(1, size) array[i][j] = kernel[i][j].Scalars() } } // Field (destination) loop ranges r1, r2 := kernelRanges(size, pbc) // smallest cell dimension is our typical length scale L := cellsize[X] { if cellsize[Y] < L { L = cellsize[Y] } if cellsize[Z] < L { L = cellsize[Z] } } progress, progmax := 0, (1+(r2[Y]-r1[Y]))*(1+(r2[Z]-r1[Z])) // progress bar done := make(chan struct{}, 3) // parallel calculation of one component done? // Start brute integration // 9 nested loops, does that stress you out? // Fortunately, the 5 inner ones usually loop over just one element. for s := 0; s < 3; s++ { // source index Ksdxyz (parallelized over) go func(s int) { u, v, w := s, (s+1)%3, (s+2)%3 // u = direction of source (s), v & w are the orthogonal directions var ( R, R2 [3]float64 // field and source cell center positions pole [3]float64 // position of point charge on the surface points int // counts used integration points ) for z := r1[Z]; z <= r2[Z]; z++ { zw := wrap(z, size[Z]) // skip one half, reconstruct from symmetry later // check on wrapped index instead of loop range so it also works for PBC if zw > size[Z]/2 { if s == 0 { progress += (1 + (r2[Y] - r1[Y])) } continue } R[Z] = float64(z) * cellsize[Z] for y := r1[Y]; y <= r2[Y]; y++ { if s == 0 { // show progress of only one component progress++ util.Progress(progress, progmax, "Calculating demag kernel") } yw := wrap(y, size[Y]) if yw > size[Y]/2 { continue } R[Y] = float64(y) * cellsize[Y] for x := r1[X]; x <= r2[X]; x++ { xw := wrap(x, size[X]) if xw > size[X]/2 { continue } R[X] = float64(x) * cellsize[X] // choose number of integration points depending on how far we are from source. dx, dy, dz := delta(x)*cellsize[X], delta(y)*cellsize[Y], delta(z)*cellsize[Z] d := math.Sqrt(dx*dx + dy*dy + dz*dz) if d == 0 { d = L } maxSize := d / accuracy // maximum acceptable integration size nv := int(math.Max(cellsize[v]/maxSize, 1) + 0.5) nw := int(math.Max(cellsize[w]/maxSize, 1) + 0.5) nx := int(math.Max(cellsize[X]/maxSize, 1) + 0.5) ny := int(math.Max(cellsize[Y]/maxSize, 1) + 0.5) nz := int(math.Max(cellsize[Z]/maxSize, 1) + 0.5) // Stagger source and destination grids. // Massively improves accuracy, see note. nv *= 2 nw *= 2 util.Assert(nv > 0 && nw > 0 && nx > 0 && ny > 0 && nz > 0) scale := 1 / float64(nv*nw*nx*ny*nz) surface := cellsize[v] * cellsize[w] // the two directions perpendicular to direction s charge := surface * scale pu1 := cellsize[u] / 2. // positive pole center pu2 := -pu1 // negative pole center // Do surface integral over source cell, accumulate in B var B [3]float64 for i := 0; i < nv; i++ { pv := -(cellsize[v] / 2.) + cellsize[v]/float64(2*nv) + float64(i)*(cellsize[v]/float64(nv)) pole[v] = pv for j := 0; j < nw; j++ { pw := -(cellsize[w] / 2.) + cellsize[w]/float64(2*nw) + float64(j)*(cellsize[w]/float64(nw)) pole[w] = pw // Do volume integral over destination cell for α := 0; α < nx; α++ { rx := R[X] - cellsize[X]/2 + cellsize[X]/float64(2*nx) + (cellsize[X]/float64(nx))*float64(α) for β := 0; β < ny; β++ { ry := R[Y] - cellsize[Y]/2 + cellsize[Y]/float64(2*ny) + (cellsize[Y]/float64(ny))*float64(β) for γ := 0; γ < nz; γ++ { rz := R[Z] - cellsize[Z]/2 + cellsize[Z]/float64(2*nz) + (cellsize[Z]/float64(nz))*float64(γ) points++ pole[u] = pu1 R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z] r := math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z]) qr := charge / (4 * math.Pi * r * r * r) bx := R2[X] * qr by := R2[Y] * qr bz := R2[Z] * qr pole[u] = pu2 R2[X], R2[Y], R2[Z] = rx-pole[X], ry-pole[Y], rz-pole[Z] r = math.Sqrt(R2[X]*R2[X] + R2[Y]*R2[Y] + R2[Z]*R2[Z]) qr = -charge / (4 * math.Pi * r * r * r) B[X] += (bx + R2[X]*qr) // addition ordered for accuracy B[Y] += (by + R2[Y]*qr) B[Z] += (bz + R2[Z]*qr) } } } } } for d := s; d < 3; d++ { // destination index Ksdxyz array[s][d][zw][yw][xw] += float32(B[d]) // += needed in case of PBC } } } } done <- struct{}{} // notify parallel computation of this component is done }(s) } // wait for all 3 components to finish <-done <-done <-done // Reconstruct skipped parts from symmetry (X) for z := 0; z < size[Z]; z++ { for y := 0; y < size[Y]; y++ { for x := size[X]/2 + 1; x < size[X]; x++ { x2 := size[X] - x array[X][X][z][y][x] = array[X][X][z][y][x2] array[X][Y][z][y][x] = -array[X][Y][z][y][x2] array[X][Z][z][y][x] = -array[X][Z][z][y][x2] array[Y][Y][z][y][x] = array[Y][Y][z][y][x2] array[Y][Z][z][y][x] = array[Y][Z][z][y][x2] array[Z][Z][z][y][x] = array[Z][Z][z][y][x2] } } } // Reconstruct skipped parts from symmetry (Y) for z := 0; z < size[Z]; z++ { for y := size[Y]/2 + 1; y < size[Y]; y++ { y2 := size[Y] - y for x := 0; x < size[X]; x++ { array[X][X][z][y][x] = array[X][X][z][y2][x] array[X][Y][z][y][x] = -array[X][Y][z][y2][x] array[X][Z][z][y][x] = array[X][Z][z][y2][x] array[Y][Y][z][y][x] = array[Y][Y][z][y2][x] array[Y][Z][z][y][x] = -array[Y][Z][z][y2][x] array[Z][Z][z][y][x] = array[Z][Z][z][y2][x] } } } // Reconstruct skipped parts from symmetry (Z) for z := size[Z]/2 + 1; z < size[Z]; z++ { z2 := size[Z] - z for y := 0; y < size[Y]; y++ { for x := 0; x < size[X]; x++ { array[X][X][z][y][x] = array[X][X][z2][y][x] array[X][Y][z][y][x] = array[X][Y][z2][y][x] array[X][Z][z][y][x] = -array[X][Z][z2][y][x] array[Y][Y][z][y][x] = array[Y][Y][z2][y][x] array[Y][Z][z][y][x] = -array[Y][Z][z2][y][x] array[Z][Z][z][y][x] = array[Z][Z][z2][y][x] } } } // for 2D these elements are zero: if size[Z] == 1 { kernel[X][Z] = nil kernel[Y][Z] = nil } // make result symmetric for tools that expect it so. kernel[Y][X] = kernel[X][Y] kernel[Z][X] = kernel[X][Z] kernel[Z][Y] = kernel[Y][Z] return kernel }
func (c *DemagConvolution) init(realKern [3][3]*data.Slice) { // init device buffers // 2D re-uses fftBuf[X] as fftBuf[Z], 3D needs all 3 fftBufs. nc := fftR2COutputSizeFloats(c.realKernSize) c.fftCBuf[X] = NewSlice(1, nc) c.fftCBuf[Y] = NewSlice(1, nc) if c.is2D() { c.fftCBuf[Z] = c.fftCBuf[X] } else { c.fftCBuf[Z] = NewSlice(1, nc) } // Real buffer shares storage with Complex buffer for i := 0; i < 3; i++ { c.fftRBuf[i] = data.SliceFromPtrs(c.realKernSize, data.GPUMemory, []unsafe.Pointer{c.fftCBuf[i].DevPtr(0)}) } // init FFT plans c.fwPlan = newFFT3DR2C(c.realKernSize[X], c.realKernSize[Y], c.realKernSize[Z]) c.bwPlan = newFFT3DC2R(c.realKernSize[X], c.realKernSize[Y], c.realKernSize[Z]) // init FFT kernel // logic size of FFT(kernel): store real parts only c.fftKernLogicSize = fftR2COutputSizeFloats(c.realKernSize) util.Assert(c.fftKernLogicSize[X]%2 == 0) c.fftKernLogicSize[X] /= 2 // physical size of FFT(kernel): store only non-redundant part exploiting Y, Z mirror symmetry // X mirror symmetry already exploited: FFT(kernel) is purely real. physKSize := [3]int{c.fftKernLogicSize[X], c.fftKernLogicSize[Y]/2 + 1, c.fftKernLogicSize[Z]/2 + 1} output := c.fftCBuf[0] input := c.fftRBuf[0] fftKern := data.NewSlice(1, physKSize) kfull := data.NewSlice(1, output.Size()) // not yet exploiting symmetry kfulls := kfull.Scalars() kCSize := physKSize kCSize[X] *= 2 // size of kernel after removing Y,Z redundant parts, but still complex kCmplx := data.NewSlice(1, kCSize) // not yet exploiting X symmetry kc := kCmplx.Scalars() for i := 0; i < 3; i++ { for j := i; j < 3; j++ { // upper triangular part if realKern[i][j] != nil { // ignore 0's // FW FFT data.Copy(input, realKern[i][j]) c.fwPlan.ExecAsync(input, output) data.Copy(kfull, output) // extract non-redundant part (Y,Z symmetry) for iz := 0; iz < kCSize[Z]; iz++ { for iy := 0; iy < kCSize[Y]; iy++ { for ix := 0; ix < kCSize[X]; ix++ { kc[iz][iy][ix] = kfulls[iz][iy][ix] } } } // extract real parts (X symmetry) scaleRealParts(fftKern, kCmplx, 1/float32(c.fwPlan.InputLen())) c.kern[i][j] = GPUCopy(fftKern) } } } }
func NewScalarMask(Nx, Ny, Nz int) *data.Slice { return data.NewSlice(1, [3]int{Nx, Ny, Nz}) }
func NewVectorMask(Nx, Ny, Nz int) *data.Slice { return data.NewSlice(3, [3]int{Nx, Ny, Nz}) }
// Returns a new new slice (3D array) with given number of components and size. func NewSlice(ncomp, Nx, Ny, Nz int) *data.Slice { return data.NewSlice(ncomp, [3]int{Nx, Ny, Nz}) }
func (geometry *geom) setGeom(s Shape) { SetBusy(true) defer SetBusy(false) if s == nil { // TODO: would be nice not to save volume if entirely filled s = universe } geometry.shape = s if geometry.Gpu().IsNil() { geometry.buffer = cuda.NewSlice(1, geometry.Mesh().Size()) } host := data.NewSlice(1, geometry.Gpu().Size()) array := host.Scalars() V := host v := array n := geometry.Mesh().Size() c := geometry.Mesh().CellSize() cx, cy, cz := c[X], c[Y], c[Z] progress, progmax := 0, n[Y]*n[Z] var ok bool for iz := 0; iz < n[Z]; iz++ { for iy := 0; iy < n[Y]; iy++ { progress++ util.Progress(progress, progmax, "Initializing geometry") for ix := 0; ix < n[X]; ix++ { r := Index2Coord(ix, iy, iz) x0, y0, z0 := r[X], r[Y], r[Z] // check if center and all vertices lie inside or all outside allIn, allOut := true, true if s(x0, y0, z0) { allOut = false } else { allIn = false } if edgeSmooth != 0 { // center is sufficient if we're not really smoothing for _, Δx := range []float64{-cx / 2, cx / 2} { for _, Δy := range []float64{-cy / 2, cy / 2} { for _, Δz := range []float64{-cz / 2, cz / 2} { if s(x0+Δx, y0+Δy, z0+Δz) { // inside allOut = false } else { allIn = false } } } } } switch { case allIn: v[iz][iy][ix] = 1 ok = true case allOut: v[iz][iy][ix] = 0 default: v[iz][iy][ix] = geometry.cellVolume(ix, iy, iz) ok = ok || (v[iz][iy][ix] != 0) } } } } if !ok { util.Fatal("SetGeom: geometry completely empty") } data.Copy(geometry.buffer, V) // M inside geom but previously outside needs to be re-inited needupload := false geomlist := host.Host()[0] mhost := M.Buffer().HostCopy() m := mhost.Host() rng := rand.New(rand.NewSource(0)) for i := range m[0] { if geomlist[i] != 0 { mx, my, mz := m[X][i], m[Y][i], m[Z][i] if mx == 0 && my == 0 && mz == 0 { needupload = true rnd := randomDir(rng) m[X][i], m[Y][i], m[Z][i] = float32(rnd[X]), float32(rnd[Y]), float32(rnd[Z]) } } } if needupload { data.Copy(M.Buffer(), mhost) } M.normalize() // removes m outside vol }
// rescale and download quantity, save in rescaleBuf func (ren *render) download() { InjectAndWait(func() { if ren.quant == nil { // not yet set, default = m ren.quant = &M } quant := ren.quant size := quant.Mesh().Size() // don't slice out of bounds renderLayer := ren.layer if renderLayer >= size[Z] { renderLayer = size[Z] - 1 } if renderLayer < 0 { renderLayer = 0 } // scaling sanity check if ren.scale < 1 { ren.scale = 1 } if ren.scale > maxScale { ren.scale = maxScale } // Don't render too large images or we choke for size[X]/ren.scale > maxImgSize { ren.scale++ } for size[Y]/ren.scale > maxImgSize { ren.scale++ } for i := range size { size[i] /= ren.scale if size[i] == 0 { size[i] = 1 } } size[Z] = 1 // selects one layer // make sure buffers are there if ren.imgBuf.Size() != size { ren.imgBuf = data.NewSlice(3, size) // always 3-comp, may be re-used } buf, r := quant.Slice() if r { defer cuda.Recycle(buf) } if !buf.GPUAccess() { ren.imgBuf = Download(quant) // fallback (no zoom) return } // make sure buffers are there (in CUDA context) if ren.rescaleBuf.Size() != size { ren.rescaleBuf.Free() ren.rescaleBuf = cuda.NewSlice(1, size) } for c := 0; c < quant.NComp(); c++ { cuda.Resize(ren.rescaleBuf, buf.Comp(c), renderLayer) data.Copy(ren.imgBuf.Comp(c), ren.rescaleBuf) } }) }
// Kernel for the vertical derivative of the force on an MFM tip due to mx, my, mz. // This is the 2nd derivative of the energy w.r.t. z. func MFMKernel(mesh *d.Mesh, lift, tipsize float64) (kernel [3]*d.Slice) { const TipCharge = 1 / Mu0 // tip charge const Δ = 1e-9 // tip oscillation, take 2nd derivative over this distance util.AssertMsg(lift > 0, "MFM tip crashed into sample, please lift the new one higher") { // Kernel mesh is 2x larger than input, instead in case of PBC pbc := mesh.PBC() sz := padSize(mesh.Size(), pbc) cs := mesh.CellSize() mesh = d.NewMesh(sz[X], sz[Y], sz[Z], cs[X], cs[Y], cs[Z], pbc[:]...) } // Shorthand size := mesh.Size() pbc := mesh.PBC() cellsize := mesh.CellSize() volume := cellsize[X] * cellsize[Y] * cellsize[Z] fmt.Println("calculating MFM kernel") // Sanity check { util.Assert(size[Z] >= 1 && size[Y] >= 2 && size[X] >= 2) util.Assert(cellsize[X] > 0 && cellsize[Y] > 0 && cellsize[Z] > 0) util.AssertMsg(size[X]%2 == 0 && size[Y]%2 == 0, "Even kernel size needed") if size[Z] > 1 { util.AssertMsg(size[Z]%2 == 0, "Even kernel size needed") } } // Allocate only upper diagonal part. The rest is symmetric due to reciprocity. var K [3][][][]float32 for i := 0; i < 3; i++ { kernel[i] = d.NewSlice(1, mesh.Size()) K[i] = kernel[i].Scalars() } r1, r2 := kernelRanges(size, pbc) progress, progmax := 0, (1+r2[Y]-r1[Y])*(1+r2[Z]-r1[Z]) for iz := r1[Z]; iz <= r2[Z]; iz++ { zw := wrap(iz, size[Z]) z := float64(iz) * cellsize[Z] for iy := r1[Y]; iy <= r2[Y]; iy++ { yw := wrap(iy, size[Y]) y := float64(iy) * cellsize[Y] progress++ util.Progress(progress, progmax, "Calculating MFM kernel") for ix := r1[X]; ix <= r2[X]; ix++ { x := float64(ix) * cellsize[X] xw := wrap(ix, size[X]) for s := 0; s < 3; s++ { // source index Ksxyz m := d.Vector{0, 0, 0} m[s] = 1 var E [3]float64 // 3 energies for 2nd derivative for i := -1; i <= 1; i++ { I := float64(i) R := d.Vector{-x, -y, z - (lift + (I * Δ))} r := R.Len() B := R.Mul(TipCharge / (4 * math.Pi * r * r * r)) R = d.Vector{-x, -y, z - (lift + tipsize + (I * Δ))} r = R.Len() B = B.Add(R.Mul(-TipCharge / (4 * math.Pi * r * r * r))) E[i+1] = B.Dot(m) * volume // i=-1 stored in E[0] } dFdz_tip := ((E[0] - E[1]) + (E[2] - E[1])) / (Δ * Δ) // dFz/dz = d2E/dz2 K[s][zw][yw][xw] += float32(dFdz_tip) // += needed in case of PBC } } } } return kernel }
func (r *reader) readSlice() (s *data.Slice, info data.Meta, err error) { r.err = nil // clear previous error, if any magic := r.readString() if r.err != nil { return nil, data.Meta{}, r.err } if magic != MAGIC { r.err = fmt.Errorf("dump: bad magic number:%v", magic) return nil, data.Meta{}, r.err } nComp := r.readInt() size := [3]int{} size[2] = r.readInt() // backwards compatible coordinates! size[1] = r.readInt() size[0] = r.readInt() cell := [3]float64{} cell[2] = r.readFloat64() cell[1] = r.readFloat64() cell[0] = r.readFloat64() info.CellSize = cell info.MeshUnit = r.readString() info.Time = r.readFloat64() _ = r.readString() // time unit s = data.NewSlice(nComp, size) info.Name = r.readString() info.Unit = r.readString() precission := r.readUint64() util.AssertMsg(precission == 4, "only single precission supported") if r.err != nil { return } host := s.Tensors() ncomp := s.NComp() for c := 0; c < ncomp; c++ { for iz := 0; iz < size[2]; iz++ { for iy := 0; iy < size[1]; iy++ { for ix := 0; ix < size[0]; ix++ { host[c][iz][iy][ix] = r.readFloat32() } } } } // Check CRC var mycrc uint64 // checksum by this reader if r.crc != nil { mycrc = r.crc.Sum64() } storedcrc := r.readUint64() // checksum from data stream. 0 means not set if r.err != nil { return nil, data.Meta{}, r.err } if r.crc != nil { r.crc.Reset() // reset for next frame } if r.crc != nil && storedcrc != 0 && mycrc != storedcrc { r.err = fmt.Errorf("dump CRC error: expected %16x, got %16x", storedcrc, mycrc) return nil, data.Meta{}, r.err } return s, info, nil }