func (d *dotProduct) EvalTo(dst *data.Slice) { A := ValueOf(d.a) defer cuda.Recycle(A) B := ValueOf(d.b) defer cuda.Recycle(B) cuda.Zero(dst) cuda.AddDotProduct(dst, 1, A, B) }
// Euler method, can be used as solver.Step. func (s *BackwardEuler) Step() { util.AssertMsg(MaxErr > 0, "Backward euler solver requires MaxErr > 0") t0 := Time y := M.Buffer() y0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(y0) data.Copy(y0, y) dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if s.dy1 == nil { s.dy1 = cuda.Buffer(VECTOR, y.Size()) } dy1 := s.dy1 Dt_si = FixDt dt := float32(Dt_si * GammaLL) util.AssertMsg(dt > 0, "Backward Euler solver requires fixed time step > 0") // Fist guess Time = t0 + 0.5*Dt_si // 0.5 dt makes it implicit midpoint method // with temperature, previous torque cannot be used as predictor if Temp.isZero() { cuda.Madd2(y, y0, dy1, 1, dt) // predictor euler step with previous torque M.normalize() } torqueFn(dy0) cuda.Madd2(y, y0, dy0, 1, dt) // y = y0 + dt * dy M.normalize() // One iteration torqueFn(dy1) cuda.Madd2(y, y0, dy1, 1, dt) // y = y0 + dt * dy1 M.normalize() Time = t0 + Dt_si err := cuda.MaxVecDiff(dy0, dy1) * float64(dt) // adjust next time step //if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK NSteps++ setLastErr(err) setMaxTorque(dy1) //} else { // undo bad step // util.Assert(FixDt == 0) // Time = t0 // data.Copy(y, y0) // NUndone++ //} }
// vector dot product func dot(a, b outputField) float64 { A, recyA := a.Slice() if recyA { defer cuda.Recycle(A) } B, recyB := b.Slice() if recyB { defer cuda.Recycle(B) } return float64(cuda.Dot(A, B)) }
func (mini *Minimizer) Step() { m := M.Buffer() size := m.Size() k := mini.k h := mini.h // save original magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) // make descent cuda.Minimize(m, m0, k, h) // calculate new torque for next step k0 := cuda.Buffer(3, size) defer cuda.Recycle(k0) data.Copy(k0, k) torqueFn(k) setMaxTorque(k) // report to user // just to make the following readable dm := m0 dk := k0 // calculate step difference of m and k cuda.Madd2(dm, m, m0, 1., -1.) cuda.Madd2(dk, k, k0, -1., 1.) // reversed due to LLNoPrecess sign // get maxdiff and add to list max_dm := cuda.MaxVecNorm(dm) mini.lastDm.Add(max_dm) setLastErr(mini.lastDm.Max()) // report maxDm to user as LastErr // adjust next time step var nom, div float32 if NSteps%2 == 0 { nom = cuda.Dot(dm, dm) div = cuda.Dot(dm, dk) } else { nom = cuda.Dot(dm, dk) div = cuda.Dot(dk, dk) } if div != 0. { mini.h = nom / div } else { // in case of division by zero mini.h = 1e-4 } M.normalize() // as a convention, time does not advance during relax NSteps++ }
func (d *dotProduct) Slice() (*data.Slice, bool) { slice := cuda.Buffer(d.NComp(), d.Mesh().Size()) cuda.Zero(slice) A, r := d.a.Slice() if r { defer cuda.Recycle(A) } B, r := d.b.Slice() if r { defer cuda.Recycle(B) } cuda.AddDotProduct(slice, 1, A, B) return slice, true }
// returns a function that adds to dst the energy density: // prefactor * dot (M_full, field) func makeEdensAdder(field outputField, prefactor float64) func(*data.Slice) { return func(dst *data.Slice) { B, r1 := field.Slice() if r1 { defer cuda.Recycle(B) } m, r2 := M_full.Slice() if r2 { defer cuda.Recycle(m) } factor := float32(prefactor) cuda.AddDotProduct(dst, factor, B, m) } }
// Euler method, can be used as solver.Step. func (_ *Euler) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) torqueFn(dy0) setMaxTorque(dy0) // Adaptive time stepping: treat MaxErr as the maximum magnetization delta // (proportional to the error, but an overestimation for sure) var dt float32 if FixDt != 0 { Dt_si = FixDt dt = float32(Dt_si * GammaLL) } else { dt = float32(MaxErr / LastTorque) Dt_si = float64(dt) / GammaLL } util.AssertMsg(dt > 0, "Euler solver requires fixed time step > 0") setLastErr(float64(dt) * LastTorque) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy M.normalize() Time += Dt_si NSteps++ }
// Gets func GetMaxTorque() float64 { torque, recycle := Torque.Slice() if recycle { defer cuda.Recycle(torque) } return cuda.MaxVecNorm(torque) }
// average of quantity over universe func qAverageUniverse(q Quantity) []float64 { s, recycle := q.Slice() if recycle { defer cuda.Recycle(s) } return sAverageUniverse(s) }
func (g *geom) average() []float64 { s, r := g.Slice() if r { defer cuda.Recycle(s) } return sAverageUniverse(s) }
// AddCustomField evaluates the user-defined custom field terms // and adds the result to dst. func AddCustomField(dst *data.Slice) { for _, term := range customTerms { buf := ValueOf(term) cuda.Add(dst, dst, buf) cuda.Recycle(buf) } }
func (d *pointwiseDiv) EvalTo(dst *data.Slice) { a := ValueOf(d.a) defer cuda.Recycle(a) b := ValueOf(d.b) defer cuda.Recycle(b) switch { case a.NComp() == b.NComp(): divNN(dst, a, b) // vector*vector, scalar*scalar case b.NComp() == 1: divN1(dst, a, b) default: panic(fmt.Sprintf("Cannot point-wise divide %v components by %v components", a.NComp(), b.NComp())) } }
func (r *Regions) average() []float64 { s, recycle := r.Slice() if recycle { defer cuda.Recycle(s) } return sAverageUniverse(s) }
func GetMaxAngle() float64 { s, recycle := SpinAngle.Slice() if recycle { defer cuda.Recycle(s) } return float64(cuda.MaxAbs(s)) // just a max would be fine, but not currently implemented }
// average of quantity over universe func qAverageUniverse(q outputField) []float64 { s, recycle := q.Slice() if recycle { defer cuda.Recycle(s) } return sAverageUniverse(s) }
func (g *geom) shift(dx int) { // empty mask, nothing to do if g == nil || g.buffer.IsNil() { return } // allocated mask: shift s := g.buffer s2 := cuda.Buffer(1, g.Mesh().Size()) defer cuda.Recycle(s2) newv := float32(1) // initially fill edges with 1's cuda.ShiftX(s2, s, dx, newv, newv) data.Copy(s, s2) n := Mesh().Size() x1, x2 := shiftDirtyRange(dx) for iz := 0; iz < n[Z]; iz++ { for iy := 0; iy < n[Y]; iy++ { for ix := x1; ix < x2; ix++ { r := Index2Coord(ix, iy, iz) // includes shift if !g.shape(r[X], r[Y], r[Z]) { cuda.SetCell(g.buffer, 0, ix, iy, iz, 0) // a bit slowish, but hardly reached } } } } }
func AddAnisotropyEnergyDensity(dst *data.Slice) { haveUnixial := ku1_red.nonZero() || ku2_red.nonZero() haveCubic := kc1_red.nonZero() || kc2_red.nonZero() || kc3_red.nonZero() if !haveUnixial && !haveCubic { return } buf := cuda.Buffer(B_anis.NComp(), B_anis.Mesh().Size()) defer cuda.Recycle(buf) // unnormalized magnetization: Mf, r := M_full.Slice() if r { defer cuda.Recycle(Mf) } if haveUnixial { // 1st cuda.Zero(buf) cuda.AddUniaxialAnisotropy(buf, M.Buffer(), ku1_red.gpuLUT1(), zero.gpuLUT1(), AnisU.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./2., buf, Mf) // 2nd cuda.Zero(buf) cuda.AddUniaxialAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), ku2_red.gpuLUT1(), AnisU.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./4., buf, Mf) } if haveCubic { // 1st cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), kc1_red.gpuLUT1(), zero.gpuLUT1(), zero.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./4., buf, Mf) // 2nd cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), kc2_red.gpuLUT1(), zero.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./6., buf, Mf) // 3nd cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), zero.gpuLUT1(), kc3_red.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./8., buf, Mf) } }
// Returns anisotropy energy in joules. func GetAnisotropyEnergy() float64 { buf := cuda.Buffer(1, Edens_anis.Mesh().Size()) defer cuda.Recycle(buf) cuda.Zero(buf) AddAnisotropyEnergyDensity(buf) return cellVolume() * float64(cuda.Sum(buf)) }
func AddAnisotropyEnergyDensity(dst *data.Slice) { haveUnixial := Ku1.nonZero() || Ku2.nonZero() haveCubic := Kc1.nonZero() || Kc2.nonZero() || Kc3.nonZero() if !haveUnixial && !haveCubic { return } buf := cuda.Buffer(B_anis.NComp(), B_anis.Mesh().Size()) defer cuda.Recycle(buf) // unnormalized magnetization: Mf, r := M_full.Slice() if r { defer cuda.Recycle(Mf) } if haveUnixial { // 1st cuda.Zero(buf) addUniaxialAnisotropyFrom(buf, M, Msat, Ku1, sZero, AnisU) cuda.AddDotProduct(dst, -1./2., buf, Mf) // 2nd cuda.Zero(buf) addUniaxialAnisotropyFrom(buf, M, Msat, sZero, Ku2, AnisU) cuda.AddDotProduct(dst, -1./4., buf, Mf) } if haveCubic { // 1st cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, Kc1, sZero, sZero, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./4., buf, Mf) // 2nd cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, sZero, Kc2, sZero, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./6., buf, Mf) // 3nd cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, sZero, sZero, Kc3, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./8., buf, Mf) } }
// Adds the current spin transfer torque to dst func AddSTTorque(dst *data.Slice) { if J.isZero() { return } util.AssertMsg(!Pol.isZero(), "spin polarization should not be 0") jspin, rec := J.Slice() if rec { defer cuda.Recycle(jspin) } fl, rec := FixedLayer.Slice() if rec { defer cuda.Recycle(fl) } if !DisableZhangLiTorque { msat := Msat.MSlice() defer msat.Recycle() j := J.MSlice() defer j.Recycle() alpha := Alpha.MSlice() defer alpha.Recycle() xi := Xi.MSlice() defer xi.Recycle() pol := Pol.MSlice() defer pol.Recycle() cuda.AddZhangLiTorque(dst, M.Buffer(), msat, j, alpha, xi, pol, Mesh()) } if !DisableSlonczewskiTorque && !FixedLayer.isZero() { msat := Msat.MSlice() defer msat.Recycle() j := J.MSlice() defer j.Recycle() fixedP := FixedLayer.MSlice() defer fixedP.Recycle() alpha := Alpha.MSlice() defer alpha.Recycle() pol := Pol.MSlice() defer pol.Recycle() lambda := Lambda.MSlice() defer lambda.Recycle() epsPrime := EpsilonPrime.MSlice() defer epsPrime.Recycle() cuda.AddSlonczewskiTorque2(dst, M.Buffer(), msat, j, fixedP, alpha, pol, lambda, epsPrime, Mesh()) } }
func (q *oneReg) average() []float64 { slice, r := q.Slice() if r { defer cuda.Recycle(slice) } avg := sAverageUniverse(slice) sDiv(avg, regions.volume(q.region)) return avg }
func GetTopologicalCharge() float64 { s, recycle := Ext_TopologicalChargeDensity.Slice() if recycle { defer cuda.Recycle(s) } c := Mesh().CellSize() N := Mesh().Size() return (0.25 * c[X] * c[Y] / math.Pi / float64(N[Z])) * float64(cuda.Sum(s)) }
func (d *pointwiseMul) EvalTo(dst *data.Slice) { cuda.Zero(dst) a := ValueOf(d.a) defer cuda.Recycle(a) b := ValueOf(d.b) defer cuda.Recycle(b) switch { case a.NComp() == b.NComp(): mulNN(dst, a, b) // vector*vector, scalar*scalar case a.NComp() == 1: mul1N(dst, a, b) case b.NComp() == 1: mul1N(dst, b, a) default: panic(fmt.Sprintf("Cannot point-wise multiply %v components by %v components", a.NComp(), b.NComp())) } }
func shiftMag(m *data.Slice, dx int) { m2 := cuda.Buffer(1, m.Size()) defer cuda.Recycle(m2) for c := 0; c < m.NComp(); c++ { comp := m.Comp(c) cuda.ShiftX(m2, comp, dx, float32(ShiftMagL[c]), float32(ShiftMagR[c])) data.Copy(comp, m2) // str0 ? } }
// returns a new slice equal to q in the given region, 0 outside. func (q *oneReg) Slice() (*data.Slice, bool) { src, r := q.parent.Slice() if r { defer cuda.Recycle(src) } out := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.RegionSelect(out, src, regions.Gpu(), byte(q.region)) return out, true }
func (q *cropped) Slice() (*data.Slice, bool) { src, r := q.parent.Slice() if r { defer cuda.Recycle(src) } dst := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.Crop(dst, src, q.x1, q.y1, q.z1) return dst, true }
func (b *thermField) update() { // we need to fix the time step here because solver will not yet have done it before the first step. // FixDt as an lvalue that sets Dt_si on change might be cleaner. if FixDt != 0 { Dt_si = FixDt } if b.generator == 0 { b.generator = curand.CreateGenerator(curand.PSEUDO_DEFAULT) b.generator.SetSeed(b.seed) } if b.noise == nil { b.noise = cuda.NewSlice(b.NComp(), b.Mesh().Size()) // when noise was (re-)allocated it's invalid for sure. B_therm.step = -1 B_therm.dt = -1 } if Temp.isZero() { cuda.Memset(b.noise, 0, 0, 0) b.step = NSteps b.dt = Dt_si return } // keep constant during time step if NSteps == b.step && Dt_si == b.dt { return } if FixDt == 0 { util.Fatal("Finite temperature requires fixed time step. Set FixDt != 0.") } N := Mesh().NCell() k2_VgammaDt := 2 * mag.Kb / (GammaLL * cellVolume() * Dt_si) noise := cuda.Buffer(1, Mesh().Size()) defer cuda.Recycle(noise) const mean = 0 const stddev = 1 dst := b.noise ms := Msat.MSlice() defer ms.Recycle() temp := Temp.MSlice() defer temp.Recycle() alpha := Alpha.MSlice() defer alpha.Recycle() for i := 0; i < 3; i++ { b.generator.GenerateNormal(uintptr(noise.DevPtr(0)), int64(N), mean, stddev) cuda.SetTemperature(dst.Comp(i), noise, k2_VgammaDt, ms, temp, alpha) } b.step = NSteps b.dt = Dt_si }
// Save image once, with auto file name func Snapshot(q outputField) { fname := fmt.Sprintf(OD()+FilenameFormat+"."+SnapshotFormat, q.Name(), autonum[q]) s, r := q.Slice() if r { defer cuda.Recycle(s) } data := s.HostCopy() // must be copy (asyncio) queOutput(func() { snapshot_sync(fname, data) }) autonum[q]++ }
func SetMFM(dst *data.Slice) { buf := cuda.Buffer(3, Mesh().Size()) defer cuda.Recycle(buf) if mfmconv_ == nil { reinitmfmconv() } mfmconv_.Exec(buf, M.Buffer(), geometry.Gpu(), Bsat.gpuLUT1(), regions.Gpu()) cuda.Madd3(dst, buf.Comp(0), buf.Comp(1), buf.Comp(2), 1, 1, 1) }
// Adaptive Heun method, can be used as solver.Step func (_ *Heun) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if FixDt != 0 { Dt_si = FixDt } dt := float32(Dt_si * GammaLL) util.Assert(dt > 0) // stage 1 torqueFn(dy0) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy // stage 2 dy := cuda.Buffer(3, y.Size()) defer cuda.Recycle(dy) Time += Dt_si torqueFn(dy) err := cuda.MaxVecDiff(dy0, dy) * float64(dt) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK cuda.Madd3(y, y, dy, dy0, 1, 0.5*dt, -0.5*dt) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./2.)) setLastErr(err) setMaxTorque(dy) } else { // undo bad step util.Assert(FixDt == 0) Time -= Dt_si cuda.Madd2(y, y, dy0, 1, -dt) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./3.)) } }