func (m *magnetization) resize() { backup := m.Buffer().HostCopy() s2 := Mesh().Size() resized := data.Resample(backup, s2) m.buffer_.Free() m.buffer_ = cuda.NewSlice(VECTOR, s2) data.Copy(m.buffer_, resized) }
func (b *thermField) update() { // we need to fix the time step here because solver will not yet have done it before the first step. // FixDt as an lvalue that sets Dt_si on change might be cleaner. if FixDt != 0 { Dt_si = FixDt } if b.generator == 0 { b.generator = curand.CreateGenerator(curand.PSEUDO_DEFAULT) b.generator.SetSeed(b.seed) } if b.noise == nil { b.noise = cuda.NewSlice(b.NComp(), b.Mesh().Size()) // when noise was (re-)allocated it's invalid for sure. B_therm.step = -1 B_therm.dt = -1 } if Temp.isZero() { cuda.Memset(b.noise, 0, 0, 0) b.step = NSteps b.dt = Dt_si return } // keep constant during time step if NSteps == b.step && Dt_si == b.dt { return } if FixDt == 0 { util.Fatal("Finite temperature requires fixed time step. Set FixDt != 0.") } N := Mesh().NCell() k2_VgammaDt := 2 * mag.Kb / (GammaLL * cellVolume() * Dt_si) noise := cuda.Buffer(1, Mesh().Size()) defer cuda.Recycle(noise) const mean = 0 const stddev = 1 dst := b.noise ms := Msat.MSlice() defer ms.Recycle() temp := Temp.MSlice() defer temp.Recycle() alpha := Alpha.MSlice() defer alpha.Recycle() for i := 0; i < 3; i++ { b.generator.GenerateNormal(uintptr(noise.DevPtr(0)), int64(N), mean, stddev) cuda.SetTemperature(dst.Comp(i), noise, k2_VgammaDt, ms, temp, alpha) } b.step = NSteps b.dt = Dt_si }
// allocate storage (not done by init, as mesh size may not yet be known then) func (m *magnetization) alloc() { m.buffer_ = cuda.NewSlice(3, m.Mesh().Size()) m.Set(RandomMag()) // sane starting config }
func (rk *RK23) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./2.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./2.)*h) // m = m*1 + k1*h/2 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./4.)*Dt_si cuda.Madd2(m, m0, k2, 1, (3./4.)*h) // m = m0*1 + k2*3/4 M.normalize() torqueFn(k3) // 3rd order solution madd4(m, m0, rk.k1, k2, k3, 1, (2./9.)*h, (1./3.)*h, (4./9.)*h) M.normalize() // error estimate Time = t0 + Dt_si torqueFn(k4) Err := k2 // re-use k2 as error // difference of 3rd and 2nd order torque without explicitly storing them first madd4(Err, rk.k1, k2, k3, k4, (7./24.)-(2./9.), (1./4.)-(1./3.), (1./3.)-(4./9.), (1. / 8.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k4) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./3.)) data.Copy(rk.k1, k4) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./4.)) } }
func (geometry *geom) setGeom(s Shape) { SetBusy(true) defer SetBusy(false) if s == nil { // TODO: would be nice not to save volume if entirely filled s = universe } geometry.shape = s if geometry.Gpu().IsNil() { geometry.buffer = cuda.NewSlice(1, geometry.Mesh().Size()) } host := data.NewSlice(1, geometry.Gpu().Size()) array := host.Scalars() V := host v := array n := geometry.Mesh().Size() c := geometry.Mesh().CellSize() cx, cy, cz := c[X], c[Y], c[Z] progress, progmax := 0, n[Y]*n[Z] var ok bool for iz := 0; iz < n[Z]; iz++ { for iy := 0; iy < n[Y]; iy++ { progress++ util.Progress(progress, progmax, "Initializing geometry") for ix := 0; ix < n[X]; ix++ { r := Index2Coord(ix, iy, iz) x0, y0, z0 := r[X], r[Y], r[Z] // check if center and all vertices lie inside or all outside allIn, allOut := true, true if s(x0, y0, z0) { allOut = false } else { allIn = false } if edgeSmooth != 0 { // center is sufficient if we're not really smoothing for _, Δx := range []float64{-cx / 2, cx / 2} { for _, Δy := range []float64{-cy / 2, cy / 2} { for _, Δz := range []float64{-cz / 2, cz / 2} { if s(x0+Δx, y0+Δy, z0+Δz) { // inside allOut = false } else { allIn = false } } } } } switch { case allIn: v[iz][iy][ix] = 1 ok = true case allOut: v[iz][iy][ix] = 0 default: v[iz][iy][ix] = geometry.cellVolume(ix, iy, iz) ok = ok || (v[iz][iy][ix] != 0) } } } } if !ok { util.Fatal("SetGeom: geometry completely empty") } data.Copy(geometry.buffer, V) // M inside geom but previously outside needs to be re-inited needupload := false geomlist := host.Host()[0] mhost := M.Buffer().HostCopy() m := mhost.Host() rng := rand.New(rand.NewSource(0)) for i := range m[0] { if geomlist[i] != 0 { mx, my, mz := m[X][i], m[Y][i], m[Z][i] if mx == 0 && my == 0 && mz == 0 { needupload = true rnd := randomDir(rng) m[X][i], m[Y][i], m[Z][i] = float32(rnd[X]), float32(rnd[Y]), float32(rnd[Z]) } } } if needupload { data.Copy(M.Buffer(), mhost) } M.normalize() // removes m outside vol }
// rescale and download quantity, save in rescaleBuf func (ren *render) download() { InjectAndWait(func() { if ren.quant == nil { // not yet set, default = m ren.quant = &M } quant := ren.quant size := quant.Mesh().Size() // don't slice out of bounds renderLayer := ren.layer if renderLayer >= size[Z] { renderLayer = size[Z] - 1 } if renderLayer < 0 { renderLayer = 0 } // scaling sanity check if ren.scale < 1 { ren.scale = 1 } if ren.scale > maxScale { ren.scale = maxScale } // Don't render too large images or we choke for size[X]/ren.scale > maxImgSize { ren.scale++ } for size[Y]/ren.scale > maxImgSize { ren.scale++ } for i := range size { size[i] /= ren.scale if size[i] == 0 { size[i] = 1 } } size[Z] = 1 // selects one layer // make sure buffers are there if ren.imgBuf.Size() != size { ren.imgBuf = data.NewSlice(3, size) // always 3-comp, may be re-used } buf, r := quant.Slice() if r { defer cuda.Recycle(buf) } if !buf.GPUAccess() { ren.imgBuf = Download(quant) // fallback (no zoom) return } // make sure buffers are there (in CUDA context) if ren.rescaleBuf.Size() != size { ren.rescaleBuf.Free() ren.rescaleBuf = cuda.NewSlice(1, size) } for c := 0; c < quant.NComp(); c++ { cuda.Resize(ren.rescaleBuf, buf.Comp(c), renderLayer) data.Copy(ren.imgBuf.Comp(c), ren.rescaleBuf) } }) }
func (rk *RK45DP) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with finite temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4, k5, k6 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) defer cuda.Recycle(k5) defer cuda.Recycle(k6) // k2 will be re-used as k7 h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./5.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./5.)*h) // m = m*1 + k1*h/5 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./10.)*Dt_si cuda.Madd3(m, m0, rk.k1, k2, 1, (3./40.)*h, (9./40.)*h) M.normalize() torqueFn(k3) // stage 4 Time = t0 + (4./5.)*Dt_si madd4(m, m0, rk.k1, k2, k3, 1, (44./45.)*h, (-56./15.)*h, (32./9.)*h) M.normalize() torqueFn(k4) // stage 5 Time = t0 + (8./9.)*Dt_si madd5(m, m0, rk.k1, k2, k3, k4, 1, (19372./6561.)*h, (-25360./2187.)*h, (64448./6561.)*h, (-212./729.)*h) M.normalize() torqueFn(k5) // stage 6 Time = t0 + (1.)*Dt_si madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (9017./3168.)*h, (-355./33.)*h, (46732./5247.)*h, (49./176.)*h, (-5103./18656.)*h) M.normalize() torqueFn(k6) // stage 7: 5th order solution Time = t0 + (1.)*Dt_si // no k2 madd6(m, m0, rk.k1, k3, k4, k5, k6, 1, (35./384.)*h, (500./1113.)*h, (125./192.)*h, (-2187./6784.)*h, (11./84.)*h) // 5th M.normalize() k7 := k2 // re-use k2 torqueFn(k7) // next torque if OK // error estimate Err := cuda.Buffer(3, size) //k3 // re-use k3 as error estimate defer cuda.Recycle(Err) madd6(Err, rk.k1, k3, k4, k5, k6, k7, (35./384.)-(5179./57600.), (500./1113.)-(7571./16695.), (125./192.)-(393./640.), (-2187./6784.)-(-92097./339200.), (11./84.)-(187./2100.), (0.)-(1./40.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k7) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./5.)) data.Copy(rk.k1, k7) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./6.)) } }