func SetMFM(dst *data.Slice) { buf := cuda.Buffer(3, Mesh().Size()) defer cuda.Recycle(buf) if mfmconv_ == nil { reinitmfmconv() } mfmconv_.Exec(buf, M.Buffer(), geometry.Gpu(), Bsat.gpuLUT1(), regions.Gpu()) cuda.Madd3(dst, buf.Comp(0), buf.Comp(1), buf.Comp(2), 1, 1, 1) }
func SetMFM(dst *data.Slice) { buf := cuda.Buffer(3, Mesh().Size()) defer cuda.Recycle(buf) if mfmconv_ == nil { reinitmfmconv() } msat := Msat.MSlice() defer msat.Recycle() mfmconv_.Exec(buf, M.Buffer(), geometry.Gpu(), msat) cuda.Madd3(dst, buf.Comp(0), buf.Comp(1), buf.Comp(2), 1, 1, 1) }
// Adaptive Heun method, can be used as solver.Step func (_ *Heun) Step() { y := M.Buffer() dy0 := cuda.Buffer(VECTOR, y.Size()) defer cuda.Recycle(dy0) if FixDt != 0 { Dt_si = FixDt } dt := float32(Dt_si * GammaLL) util.Assert(dt > 0) // stage 1 torqueFn(dy0) cuda.Madd2(y, y, dy0, 1, dt) // y = y + dt * dy // stage 2 dy := cuda.Buffer(3, y.Size()) defer cuda.Recycle(dy) Time += Dt_si torqueFn(dy) err := cuda.MaxVecDiff(dy0, dy) * float64(dt) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK cuda.Madd3(y, y, dy, dy0, 1, 0.5*dt, -0.5*dt) M.normalize() NSteps++ adaptDt(math.Pow(MaxErr/err, 1./2.)) setLastErr(err) setMaxTorque(dy) } else { // undo bad step util.Assert(FixDt == 0) Time -= Dt_si cuda.Madd2(y, y, dy0, 1, -dt) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./3.)) } }
// TODO: into cuda func madd4(dst, src1, src2, src3, src4 *data.Slice, w1, w2, w3, w4 float32) { cuda.Madd3(dst, src1, src2, src3, w1, w2, w3) cuda.Madd2(dst, dst, src4, 1, w4) }
func (rk *RK45DP) Step() { m := M.Buffer() size := m.Size() if FixDt != 0 { Dt_si = FixDt } // upon resize: remove wrongly sized k1 if rk.k1.Size() != m.Size() { rk.Free() } // first step ever: one-time k1 init and eval if rk.k1 == nil { rk.k1 = cuda.NewSlice(3, size) torqueFn(rk.k1) } // FSAL cannot be used with finite temperature if !Temp.isZero() { torqueFn(rk.k1) } t0 := Time // backup magnetization m0 := cuda.Buffer(3, size) defer cuda.Recycle(m0) data.Copy(m0, m) k2, k3, k4, k5, k6 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size) defer cuda.Recycle(k2) defer cuda.Recycle(k3) defer cuda.Recycle(k4) defer cuda.Recycle(k5) defer cuda.Recycle(k6) // k2 will be re-used as k7 h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL // there is no explicit stage 1: k1 from previous step // stage 2 Time = t0 + (1./5.)*Dt_si cuda.Madd2(m, m, rk.k1, 1, (1./5.)*h) // m = m*1 + k1*h/5 M.normalize() torqueFn(k2) // stage 3 Time = t0 + (3./10.)*Dt_si cuda.Madd3(m, m0, rk.k1, k2, 1, (3./40.)*h, (9./40.)*h) M.normalize() torqueFn(k3) // stage 4 Time = t0 + (4./5.)*Dt_si madd4(m, m0, rk.k1, k2, k3, 1, (44./45.)*h, (-56./15.)*h, (32./9.)*h) M.normalize() torqueFn(k4) // stage 5 Time = t0 + (8./9.)*Dt_si madd5(m, m0, rk.k1, k2, k3, k4, 1, (19372./6561.)*h, (-25360./2187.)*h, (64448./6561.)*h, (-212./729.)*h) M.normalize() torqueFn(k5) // stage 6 Time = t0 + (1.)*Dt_si madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (9017./3168.)*h, (-355./33.)*h, (46732./5247.)*h, (49./176.)*h, (-5103./18656.)*h) M.normalize() torqueFn(k6) // stage 7: 5th order solution Time = t0 + (1.)*Dt_si // no k2 madd6(m, m0, rk.k1, k3, k4, k5, k6, 1, (35./384.)*h, (500./1113.)*h, (125./192.)*h, (-2187./6784.)*h, (11./84.)*h) // 5th M.normalize() k7 := k2 // re-use k2 torqueFn(k7) // next torque if OK // error estimate Err := cuda.Buffer(3, size) //k3 // re-use k3 as error estimate defer cuda.Recycle(Err) madd6(Err, rk.k1, k3, k4, k5, k6, k7, (35./384.)-(5179./57600.), (500./1113.)-(7571./16695.), (125./192.)-(393./640.), (-2187./6784.)-(-92097./339200.), (11./84.)-(187./2100.), (0.)-(1./40.)) // determine error err := cuda.MaxVecNorm(Err) * float64(h) // adjust next time step if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop // step OK setLastErr(err) setMaxTorque(k7) NSteps++ Time = t0 + Dt_si adaptDt(math.Pow(MaxErr/err, 1./5.)) data.Copy(rk.k1, k7) // FSAL } else { // undo bad step //util.Println("Bad step at t=", t0, ", err=", err) util.Assert(FixDt == 0) Time = t0 data.Copy(m, m0) NUndone++ adaptDt(math.Pow(MaxErr/err, 1./6.)) } }
// TODO: into cuda func madd5(dst, src1, src2, src3, src4, src5 *data.Slice, w1, w2, w3, w4, w5 float32) { cuda.Madd3(dst, src1, src2, src3, w1, w2, w3) cuda.Madd3(dst, dst, src4, src5, 1, w4, w5) }