// Euler method, can be used as solver.Step. func (s *BackwardEuler) Step() { util.AssertMsg(MaxErr > 0, "Backward euler solver requires MaxErr > 0") t0 := Time y := M.Buffer() y0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(y0) data.Copy(y0, y) dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if s.dy1 == nil { s.dy1 = cuda.Buffer(VECTOR, y.Size()) } dy1 := s.dy1 Dt_si = FixDt dt := float32(Dt_si * GammaLL) util.AssertMsg(dt > 0, "Backward Euler solver requires fixed time step > 0") // Fist guess Time = t0 + 0.5*Dt_si // 0.5 dt makes it implicit midpoint method // with temperature, previous torque cannot be used as predictor if Temp.isZero() { cuda.Madd2(y, y0, dy1, 1, dt) // predictor euler step with previous torque M.normalize() } torqueFn(dy0) cuda.Madd2(y, y0, dy0, 1, dt) // y = y0 + dt * dy M.normalize() // One iteration torqueFn(dy1) cuda.Madd2(y, y0, dy1, 1, dt) // y = y0 + dt * dy1 M.normalize() Time = t0 + Dt_si err := cuda.MaxVecDiff(dy0, dy1) * float64(dt) // adjust next time step //if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK NSteps++ setLastErr(err) setMaxTorque(dy1) //} else { // undo bad step // util.Assert(FixDt == 0) // Time = t0 // data.Copy(y, y0) // NUndone++ //} }
func (mini *Minimizer) Step() { m := M.Buffer() size := m.Size() k := mini.k h := mini.h // save original magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) // make descent cuda.Minimize(m, m0, k, h) // calculate new torque for next step k0 := cuda.Buffer(3, size) defer cuda.Recycle(k0) data.Copy(k0, k) torqueFn(k) setMaxTorque(k) // report to user // just to make the following readable dm := m0 dk := k0 // calculate step difference of m and k cuda.Madd2(dm, m, m0, 1., -1.) cuda.Madd2(dk, k, k0, -1., 1.) // reversed due to LLNoPrecess sign // get maxdiff and add to list max_dm := cuda.MaxVecNorm(dm) mini.lastDm.Add(max_dm) setLastErr(mini.lastDm.Max()) // report maxDm to user as LastErr // adjust next time step var nom, div float32 if NSteps%2 == 0 { nom = cuda.Dot(dm, dm) div = cuda.Dot(dm, dk) } else { nom = cuda.Dot(dm, dk) div = cuda.Dot(dk, dk) } if div != 0. { mini.h = nom / div } else { // in case of division by zero mini.h = 1e-4 } M.normalize() // as a convention, time does not advance during relax NSteps++ }
func (g *geom) shift(dx int) { // empty mask, nothing to do if g == nil || g.buffer.IsNil() { return } // allocated mask: shift s := g.buffer s2 := cuda.Buffer(1, g.Mesh().Size()) defer cuda.Recycle(s2) newv := float32(1) // initially fill edges with 1's cuda.ShiftX(s2, s, dx, newv, newv) data.Copy(s, s2) n := Mesh().Size() x1, x2 := shiftDirtyRange(dx) for iz := 0; iz < n[Z]; iz++ { for iy := 0; iy < n[Y]; iy++ { for ix := x1; ix < x2; ix++ { r := Index2Coord(ix, iy, iz) // includes shift if !g.shape(r[X], r[Y], r[Z]) { cuda.SetCell(g.buffer, 0, ix, iy, iz, 0) // a bit slowish, but hardly reached } } } } }
// Euler method, can be used as solver.Step. func (_ *Euler) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) torqueFn(dy0) setMaxTorque(dy0) // Adaptive time stepping: treat MaxErr as the maximum magnetization delta // (proportional to the error, but an overestimation for sure) var dt float32 if FixDt != 0 { Dt_si = FixDt dt = float32(Dt_si * GammaLL) } else { dt = float32(MaxErr / LastTorque) Dt_si = float64(dt) / GammaLL } util.AssertMsg(dt > 0, "Euler solver requires fixed time step > 0") setLastErr(float64(dt) * LastTorque) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy M.normalize() Time += Dt_si NSteps++ }
// Returns anisotropy energy in joules. func GetAnisotropyEnergy() float64 { buf := cuda.Buffer(1, Edens_anis.Mesh().Size()) defer cuda.Recycle(buf) cuda.Zero(buf) AddAnisotropyEnergyDensity(buf) return cellVolume() * float64(cuda.Sum(buf)) }
// uncompress the table to a full array with parameter values per cell. func (p *lut) Slice() (*data.Slice, bool) { gpu := p.gpuLUT() b := cuda.Buffer(p.NComp(), Mesh().Size()) for c := 0; c < p.NComp(); c++ { cuda.RegionDecode(b.Comp(c), cuda.LUTPtr(gpu[c]), regions.Gpu()) } return b, true }
// returns a new slice equal to q in the given region, 0 outside. func (q *oneReg) Slice() (*data.Slice, bool) { src, r := q.parent.Slice() if r { defer cuda.Recycle(src) } out := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.RegionSelect(out, src, regions.Gpu(), byte(q.region)) return out, true }
func shiftMag(m *data.Slice, dx int) { m2 := cuda.Buffer(1, m.Size()) defer cuda.Recycle(m2) for c := 0; c < m.NComp(); c++ { comp := m.Comp(c) cuda.ShiftX(m2, comp, dx, float32(ShiftMagL[c]), float32(ShiftMagR[c])) data.Copy(comp, m2) // str0 ? } }
func (q *cropped) Slice() (*data.Slice, bool) { src, r := q.parent.Slice() if r { defer cuda.Recycle(src) } dst := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.Crop(dst, src, q.x1, q.y1, q.z1) return dst, true }
func (b *thermField) update() { // we need to fix the time step here because solver will not yet have done it before the first step. // FixDt as an lvalue that sets Dt_si on change might be cleaner. if FixDt != 0 { Dt_si = FixDt } if b.generator == 0 { b.generator = curand.CreateGenerator(curand.PSEUDO_DEFAULT) b.generator.SetSeed(b.seed) } if b.noise == nil { b.noise = cuda.NewSlice(b.NComp(), b.Mesh().Size()) // when noise was (re-)allocated it's invalid for sure. B_therm.step = -1 B_therm.dt = -1 } if Temp.isZero() { cuda.Memset(b.noise, 0, 0, 0) b.step = NSteps b.dt = Dt_si return } // keep constant during time step if NSteps == b.step && Dt_si == b.dt { return } if FixDt == 0 { util.Fatal("Finite temperature requires fixed time step. Set FixDt != 0.") } N := Mesh().NCell() k2_VgammaDt := 2 * mag.Kb / (GammaLL * cellVolume() * Dt_si) noise := cuda.Buffer(1, Mesh().Size()) defer cuda.Recycle(noise) const mean = 0 const stddev = 1 dst := b.noise ms := Msat.MSlice() defer ms.Recycle() temp := Temp.MSlice() defer temp.Recycle() alpha := Alpha.MSlice() defer alpha.Recycle() for i := 0; i < 3; i++ { b.generator.GenerateNormal(uintptr(noise.DevPtr(0)), int64(N), mean, stddev) cuda.SetTemperature(dst.Comp(i), noise, k2_VgammaDt, ms, temp, alpha) } b.step = NSteps b.dt = Dt_si }
func (g *geom) Slice() (*data.Slice, bool) { s := g.Gpu() if s.IsNil() { s := cuda.Buffer(g.NComp(), g.Mesh().Size()) cuda.Memset(s, 1) return s, true } else { return s, false } }
func SetMFM(dst *data.Slice) { buf := cuda.Buffer(3, Mesh().Size()) defer cuda.Recycle(buf) if mfmconv_ == nil { reinitmfmconv() } mfmconv_.Exec(buf, M.Buffer(), geometry.Gpu(), Bsat.gpuLUT1(), regions.Gpu()) cuda.Madd3(dst, buf.Comp(0), buf.Comp(1), buf.Comp(2), 1, 1, 1) }
// Adaptive Heun method, can be used as solver.Step func (_ *Heun) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if FixDt != 0 { Dt_si = FixDt } dt := float32(Dt_si * GammaLL) util.Assert(dt > 0) // stage 1 torqueFn(dy0) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy // stage 2 dy := cuda.Buffer(3, y.Size()) defer cuda.Recycle(dy) Time += Dt_si torqueFn(dy) err := cuda.MaxVecDiff(dy0, dy) * float64(dt) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK cuda.Madd3(y, y, dy, dy0, 1, 0.5*dt, -0.5*dt) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./2.)) setLastErr(err) setMaxTorque(dy) } else { // undo bad step util.Assert(FixDt == 0) Time -= Dt_si cuda.Madd2(y, y, dy0, 1, -dt) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./3.)) } }
func SetMFM(dst *data.Slice) { buf := cuda.Buffer(3, Mesh().Size()) defer cuda.Recycle(buf) if mfmconv_ == nil { reinitmfmconv() } msat := Msat.MSlice() defer msat.Recycle() mfmconv_.Exec(buf, M.Buffer(), geometry.Gpu(), msat) cuda.Madd3(dst, buf.Comp(0), buf.Comp(1), buf.Comp(2), 1, 1, 1) }
func (d *dotProduct) Slice() (*data.Slice, bool) { slice := cuda.Buffer(d.NComp(), d.Mesh().Size()) cuda.Zero(slice) A, r := d.a.Slice() if r { defer cuda.Recycle(A) } B, r := d.b.Slice() if r { defer cuda.Recycle(B) } cuda.AddDotProduct(slice, 1, A, B) return slice, true }
func AddAnisotropyEnergyDensity(dst *data.Slice) { haveUnixial := Ku1.nonZero() || Ku2.nonZero() haveCubic := Kc1.nonZero() || Kc2.nonZero() || Kc3.nonZero() if !haveUnixial && !haveCubic { return } buf := cuda.Buffer(B_anis.NComp(), B_anis.Mesh().Size()) defer cuda.Recycle(buf) // unnormalized magnetization: Mf, r := M_full.Slice() if r { defer cuda.Recycle(Mf) } if haveUnixial { // 1st cuda.Zero(buf) addUniaxialAnisotropyFrom(buf, M, Msat, Ku1, sZero, AnisU) cuda.AddDotProduct(dst, -1./2., buf, Mf) // 2nd cuda.Zero(buf) addUniaxialAnisotropyFrom(buf, M, Msat, sZero, Ku2, AnisU) cuda.AddDotProduct(dst, -1./4., buf, Mf) } if haveCubic { // 1st cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, Kc1, sZero, sZero, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./4., buf, Mf) // 2nd cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, sZero, Kc2, sZero, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./6., buf, Mf) // 3nd cuda.Zero(buf) addCubicAnisotropyFrom(buf, M, Msat, sZero, sZero, Kc3, AnisC1, AnisC2) cuda.AddDotProduct(dst, -1./8., buf, Mf) } }
func AddAnisotropyEnergyDensity(dst *data.Slice) { haveUnixial := ku1_red.nonZero() || ku2_red.nonZero() haveCubic := kc1_red.nonZero() || kc2_red.nonZero() || kc3_red.nonZero() if !haveUnixial && !haveCubic { return } buf := cuda.Buffer(B_anis.NComp(), B_anis.Mesh().Size()) defer cuda.Recycle(buf) // unnormalized magnetization: Mf, r := M_full.Slice() if r { defer cuda.Recycle(Mf) } if haveUnixial { // 1st cuda.Zero(buf) cuda.AddUniaxialAnisotropy(buf, M.Buffer(), ku1_red.gpuLUT1(), zero.gpuLUT1(), AnisU.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./2., buf, Mf) // 2nd cuda.Zero(buf) cuda.AddUniaxialAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), ku2_red.gpuLUT1(), AnisU.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./4., buf, Mf) } if haveCubic { // 1st cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), kc1_red.gpuLUT1(), zero.gpuLUT1(), zero.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./4., buf, Mf) // 2nd cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), kc2_red.gpuLUT1(), zero.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./6., buf, Mf) // 3nd cuda.Zero(buf) cuda.AddCubicAnisotropy(buf, M.Buffer(), zero.gpuLUT1(), zero.gpuLUT1(), kc3_red.gpuLUT1(), AnisC1.gpuLUT(), AnisC2.gpuLUT(), regions.Gpu()) cuda.AddDotProduct(dst, -1./8., buf, Mf) } }
func Minimize() { SanityCheck() // Save the settings we are changing... prevType := solvertype prevFixDt := FixDt prevPrecess := Precess t0 := Time relaxing = true // disable temperature noise // ...to restore them later defer func() { SetSolver(prevType) FixDt = prevFixDt Precess = prevPrecess Time = t0 relaxing = false }() Precess = false // disable precession for torque calculation // remove previous stepper if stepper != nil { stepper.Free() } // set stepper to the minimizer size := M.Buffer().Size() mini := Minimizer{ h: 1e-4, k: cuda.Buffer(3, size), lastDm: FifoRing(DmSamples)} stepper = &mini // calculate initial torque torqueFn(mini.k) cond := func() bool { return (mini.lastDm.count < DmSamples || mini.lastDm.Max() > StopMaxDm) } RunWhile(cond) pause = true }
// Sets dst to the demag field, but cells where NoDemagSpins != 0 do not generate nor recieve field. func setMaskedDemagField(dst *data.Slice) { // No-demag spins: mask-out geometry with zeros where NoDemagSpins is set, // so these spins do not generate a field buf := cuda.Buffer(SCALAR, geometry.Gpu().Size()) // masked-out geometry defer cuda.Recycle(buf) // obtain a copy of the geometry mask, which we can overwrite geom, r := geometry.Slice() if r { defer cuda.Recycle(geom) } data.Copy(buf, geom) // mask-out cuda.ZeroMask(buf, NoDemagSpins.gpuLUT1(), regions.Gpu()) // convolution with masked-out cells. demagConv().Exec(dst, M.Buffer(), buf, Bsat.gpuLUT1(), regions.Gpu()) // After convolution, mask-out the field in the NoDemagSpins cells // so they don't feel the field generated by others. cuda.ZeroMask(dst, NoDemagSpins.gpuLUT1(), regions.Gpu()) }
// get the quantity, recycle will be true (q needs to be recycled) func (b *_setter) Slice() (q *data.Slice, recycle bool) { buffer := cuda.Buffer(b.NComp(), b.Mesh().Size()) b.Set(buffer) return buffer, true // must recycle }
// Get returns the regions as a slice of floats, so it can be output. func (r *Regions) Slice() (*data.Slice, bool) { buf := cuda.Buffer(1, r.Mesh().Size()) cuda.RegionDecode(buf, unitMap.gpuLUT1(), regions.Gpu()) return buf, true }
func (e *excitation) Slice() (*data.Slice, bool) { buf := cuda.Buffer(e.NComp(), e.Mesh().Size()) cuda.Zero(buf) e.AddTo(buf) return buf, true }
func ValueOf(q Q) *data.Slice { // TODO: check for Buffered() implementation buf := cuda.Buffer(q.NComp(), SizeOf(q)) q.EvalTo(buf) return buf }
func EvalScript(q Q) (*data.Slice, bool) { buf := cuda.Buffer(q.NComp(), SizeOf(q)) q.EvalTo(buf) return buf, true }
// Calculates and returns the quantity. // recycle is true: slice needs to be recycled. func (q *callbackQuant) Slice() (s *data.Slice, recycle bool) { buf := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.Zero(buf) q.call(buf) return buf, true }
// Calculates and returns the quantity. // recycle is true: slice needs to be recycled. func (q *_adder) Slice() (s *data.Slice, recycle bool) { buf := cuda.Buffer(q.NComp(), q.Mesh().Size()) q.Set(buf) return buf, true }
func (rk *RK23) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./2.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./2.)*h) // m = m*1 + k1*h/2 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./4.)*Dt_si cuda.Madd2(m, m0, k2, 1, (3./4.)*h) // m = m0*1 + k2*3/4 M.normalize() torqueFn(k3) // 3rd order solution madd4(m, m0, rk.k1, k2, k3, 1, (2./9.)*h, (1./3.)*h, (4./9.)*h) M.normalize() // error estimate Time = t0 + Dt_si torqueFn(k4) Err := k2 // re-use k2 as error // difference of 3rd and 2nd order torque without explicitly storing them first madd4(Err, rk.k1, k2, k3, k4, (7./24.)-(2./9.), (1./4.)-(1./3.), (1./3.)-(4./9.), (1. / 8.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k4) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./3.)) data.Copy(rk.k1, k4) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./4.)) } }
// Calculates and returns the quantity. // recycle is true: slice needs to be recycled. func (q *fieldFunc) Slice() (s *data.Slice, recycle bool) { buf := cuda.Buffer(q.NComp(), q.Mesh().Size()) cuda.Zero(buf) q.f(buf) return buf, true }
func (rk *RK4) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k1, k2, k3, k4 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k1) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // stage 1 torqueFn(k1) // stage 2 Time = t0 + (1./2.)*Dt_si cuda.Madd2(m, m, k1, 1, (1./2.)*h) // m = m*1 + k1*h/2 M.normalize() torqueFn(k2) // stage 3 cuda.Madd2(m, m0, k2, 1, (1./2.)*h) // m = m0*1 + k2*1/2 M.normalize() torqueFn(k3) // stage 4 Time = t0 + Dt_si cuda.Madd2(m, m0, k3, 1, 1.*h) // m = m0*1 + k3*1 M.normalize() torqueFn(k4) err := cuda.MaxVecDiff(k1, k4) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK // 4th order solution madd5(m, m0, k1, k2, k3, k4, 1, (1./6.)*h, (1./3.)*h, (1./3.)*h, (1./6.)*h) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./4.)) setLastErr(err) setMaxTorque(k4) } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./5.)) } }
func (rk *RK45DP) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with finite temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4, k5, k6 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) defer cuda.Recycle(k5) defer cuda.Recycle(k6) // k2 will be re-used as k7 h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./5.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./5.)*h) // m = m*1 + k1*h/5 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./10.)*Dt_si cuda.Madd3(m, m0, rk.k1, k2, 1, (3./40.)*h, (9./40.)*h) M.normalize() torqueFn(k3) // stage 4 Time = t0 + (4./5.)*Dt_si madd4(m, m0, rk.k1, k2, k3, 1, (44./45.)*h, (-56./15.)*h, (32./9.)*h) M.normalize() torqueFn(k4) // stage 5 Time = t0 + (8./9.)*Dt_si madd5(m, m0, rk.k1, k2, k3, k4, 1, (19372./6561.)*h, (-25360./2187.)*h, (64448./6561.)*h, (-212./729.)*h) M.normalize() torqueFn(k5) // stage 6 Time = t0 + (1.)*Dt_si madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (9017./3168.)*h, (-355./33.)*h, (46732./5247.)*h, (49./176.)*h, (-5103./18656.)*h) M.normalize() torqueFn(k6) // stage 7: 5th order solution Time = t0 + (1.)*Dt_si // no k2 madd6(m, m0, rk.k1, k3, k4, k5, k6, 1, (35./384.)*h, (500./1113.)*h, (125./192.)*h, (-2187./6784.)*h, (11./84.)*h) // 5th M.normalize() k7 := k2 // re-use k2 torqueFn(k7) // next torque if OK // error estimate Err := cuda.Buffer(3, size) //k3 // re-use k3 as error estimate defer cuda.Recycle(Err) madd6(Err, rk.k1, k3, k4, k5, k6, k7, (35./384.)-(5179./57600.), (500./1113.)-(7571./16695.), (125./192.)-(393./640.), (-2187./6784.)-(-92097./339200.), (11./84.)-(187./2100.), (0.)-(1./40.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k7) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./5.)) data.Copy(rk.k1, k7) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./6.)) } }