// Euler method, can be used as solver.Step. func (s *BackwardEuler) Step() { util.AssertMsg(MaxErr > 0, "Backward euler solver requires MaxErr > 0") t0 := Time y := M.Buffer() y0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(y0) data.Copy(y0, y) dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if s.dy1 == nil { s.dy1 = cuda.Buffer(VECTOR, y.Size()) } dy1 := s.dy1 Dt_si = FixDt dt := float32(Dt_si * GammaLL) util.AssertMsg(dt > 0, "Backward Euler solver requires fixed time step > 0") // Fist guess Time = t0 + 0.5*Dt_si // 0.5 dt makes it implicit midpoint method // with temperature, previous torque cannot be used as predictor if Temp.isZero() { cuda.Madd2(y, y0, dy1, 1, dt) // predictor euler step with previous torque M.normalize() } torqueFn(dy0) cuda.Madd2(y, y0, dy0, 1, dt) // y = y0 + dt * dy M.normalize() // One iteration torqueFn(dy1) cuda.Madd2(y, y0, dy1, 1, dt) // y = y0 + dt * dy1 M.normalize() Time = t0 + Dt_si err := cuda.MaxVecDiff(dy0, dy1) * float64(dt) // adjust next time step //if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK NSteps++ setLastErr(err) setMaxTorque(dy1) //} else { // undo bad step // util.Assert(FixDt == 0) // Time = t0 // data.Copy(y, y0) // NUndone++ //} }
func (mini *Minimizer) Step() { m := M.Buffer() size := m.Size() k := mini.k h := mini.h // save original magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) // make descent cuda.Minimize(m, m0, k, h) // calculate new torque for next step k0 := cuda.Buffer(3, size) defer cuda.Recycle(k0) data.Copy(k0, k) torqueFn(k) setMaxTorque(k) // report to user // just to make the following readable dm := m0 dk := k0 // calculate step difference of m and k cuda.Madd2(dm, m, m0, 1., -1.) cuda.Madd2(dk, k, k0, -1., 1.) // reversed due to LLNoPrecess sign // get maxdiff and add to list max_dm := cuda.MaxVecNorm(dm) mini.lastDm.Add(max_dm) setLastErr(mini.lastDm.Max()) // report maxDm to user as LastErr // adjust next time step var nom, div float32 if NSteps%2 == 0 { nom = cuda.Dot(dm, dm) div = cuda.Dot(dm, dk) } else { nom = cuda.Dot(dm, dk) div = cuda.Dot(dk, dk) } if div != 0. { mini.h = nom / div } else { // in case of division by zero mini.h = 1e-4 } M.normalize() // as a convention, time does not advance during relax NSteps++ }
// Euler method, can be used as solver.Step. func (_ *Euler) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) torqueFn(dy0) setMaxTorque(dy0) // Adaptive time stepping: treat MaxErr as the maximum magnetization delta // (proportional to the error, but an overestimation for sure) var dt float32 if FixDt != 0 { Dt_si = FixDt dt = float32(Dt_si * GammaLL) } else { dt = float32(MaxErr / LastTorque) Dt_si = float64(dt) / GammaLL } util.AssertMsg(dt > 0, "Euler solver requires fixed time step > 0") setLastErr(float64(dt) * LastTorque) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy M.normalize() Time += Dt_si NSteps++ }
// Adaptive Heun method, can be used as solver.Step func (_ *Heun) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if FixDt != 0 { Dt_si = FixDt } dt := float32(Dt_si * GammaLL) util.Assert(dt > 0) // stage 1 torqueFn(dy0) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy // stage 2 dy := cuda.Buffer(3, y.Size()) defer cuda.Recycle(dy) Time += Dt_si torqueFn(dy) err := cuda.MaxVecDiff(dy0, dy) * float64(dt) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK cuda.Madd3(y, y, dy, dy0, 1, 0.5*dt, -0.5*dt) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./2.)) setLastErr(err) setMaxTorque(dy) } else { // undo bad step util.Assert(FixDt == 0) Time -= Dt_si cuda.Madd2(y, y, dy0, 1, -dt) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./3.)) } }
func (e *excitation) AddTo(dst *data.Slice) { if !e.perRegion.isZero() { cuda.RegionAddV(dst, e.perRegion.gpuLUT(), regions.Gpu()) } for _, t := range e.extraTerms { var mul float32 = 1 if t.mul != nil { mul = float32(t.mul()) } cuda.Madd2(dst, dst, t.mask, 1, mul) } }
func (b *thermField) AddTo(dst *data.Slice) { if !Temp.isZero() { b.update() cuda.Madd2(dst, dst, b.noise, 1, 1) } }
func (rk *RK23) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./2.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./2.)*h) // m = m*1 + k1*h/2 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./4.)*Dt_si cuda.Madd2(m, m0, k2, 1, (3./4.)*h) // m = m0*1 + k2*3/4 M.normalize() torqueFn(k3) // 3rd order solution madd4(m, m0, rk.k1, k2, k3, 1, (2./9.)*h, (1./3.)*h, (4./9.)*h) M.normalize() // error estimate Time = t0 + Dt_si torqueFn(k4) Err := k2 // re-use k2 as error // difference of 3rd and 2nd order torque without explicitly storing them first madd4(Err, rk.k1, k2, k3, k4, (7./24.)-(2./9.), (1./4.)-(1./3.), (1./3.)-(4./9.), (1. / 8.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k4) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./3.)) data.Copy(rk.k1, k4) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./4.)) } }
// TODO: into cuda func madd4(dst, src1, src2, src3, src4 *data.Slice, w1, w2, w3, w4 float32) { cuda.Madd3(dst, src1, src2, src3, w1, w2, w3) cuda.Madd2(dst, dst, src4, 1, w4) }
func madd6(dst, src1, src2, src3, src4, src5, src6 *data.Slice, w1, w2, w3, w4, w5, w6 float32) { madd5(dst, src1, src2, src3, src4, src5, w1, w2, w3, w4, w5) cuda.Madd2(dst, dst, src6, 1, w6) }
func (rk *RK45DP) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with finite temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4, k5, k6 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) defer cuda.Recycle(k5) defer cuda.Recycle(k6) // k2 will be re-used as k7 h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./5.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./5.)*h) // m = m*1 + k1*h/5 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./10.)*Dt_si cuda.Madd3(m, m0, rk.k1, k2, 1, (3./40.)*h, (9./40.)*h) M.normalize() torqueFn(k3) // stage 4 Time = t0 + (4./5.)*Dt_si madd4(m, m0, rk.k1, k2, k3, 1, (44./45.)*h, (-56./15.)*h, (32./9.)*h) M.normalize() torqueFn(k4) // stage 5 Time = t0 + (8./9.)*Dt_si madd5(m, m0, rk.k1, k2, k3, k4, 1, (19372./6561.)*h, (-25360./2187.)*h, (64448./6561.)*h, (-212./729.)*h) M.normalize() torqueFn(k5) // stage 6 Time = t0 + (1.)*Dt_si madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (9017./3168.)*h, (-355./33.)*h, (46732./5247.)*h, (49./176.)*h, (-5103./18656.)*h) M.normalize() torqueFn(k6) // stage 7: 5th order solution Time = t0 + (1.)*Dt_si // no k2 madd6(m, m0, rk.k1, k3, k4, k5, k6, 1, (35./384.)*h, (500./1113.)*h, (125./192.)*h, (-2187./6784.)*h, (11./84.)*h) // 5th M.normalize() k7 := k2 // re-use k2 torqueFn(k7) // next torque if OK // error estimate Err := cuda.Buffer(3, size) //k3 // re-use k3 as error estimate defer cuda.Recycle(Err) madd6(Err, rk.k1, k3, k4, k5, k6, k7, (35./384.)-(5179./57600.), (500./1113.)-(7571./16695.), (125./192.)-(393./640.), (-2187./6784.)-(-92097./339200.), (11./84.)-(187./2100.), (0.)-(1./40.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k7) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./5.)) data.Copy(rk.k1, k7) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./6.)) } }
func (rk *RK4) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k1, k2, k3, k4 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k1) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // stage 1 torqueFn(k1) // stage 2 Time = t0 + (1./2.)*Dt_si cuda.Madd2(m, m, k1, 1, (1./2.)*h) // m = m*1 + k1*h/2 M.normalize() torqueFn(k2) // stage 3 cuda.Madd2(m, m0, k2, 1, (1./2.)*h) // m = m0*1 + k2*1/2 M.normalize() torqueFn(k3) // stage 4 Time = t0 + Dt_si cuda.Madd2(m, m0, k3, 1, 1.*h) // m = m0*1 + k3*1 M.normalize() torqueFn(k4) err := cuda.MaxVecDiff(k1, k4) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK // 4th order solution madd5(m, m0, k1, k2, k3, k4, 1, (1./6.)*h, (1./3.)*h, (1./3.)*h, (1./6.)*h) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./4.)) setLastErr(err) setMaxTorque(k4) } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./5.)) } }