func (u *ΔUpdater) Update() { qin := u.in.Array() qout := u.out.Array() q0 := u.q0.Array() qinMul := u.in.Multiplier() qoutMul := u.out.Multiplier() q0Mul := u.q0.Multiplier() COMP := u.in.NComp() pre := make([]float64, COMP) for ii := 0; ii < COMP; ii++ { qoutMul[ii] = qinMul[ii] pre[ii] = -q0Mul[ii] / qinMul[ii] //0? } switch COMP { case 1: gpu.Madd(qout.Component(0), qin.Component(0), q0.Component(0), pre[0]) case 3: gpu.VecMadd(qout, qin, q0, pre) default: panic(InputErrF("Δ is not implemented for NComp: ", COMP)) } }
func (u *SumUpdater) addTerms() { // TODO: optimize for 0,1,2 or more parents sum := u.sum parents := u.parents if sum.IsSpaceDependent() { for i := range parents { parent := parents[i] weight := u.weight[i] parMul := make([]float64, sum.NComp()) for c := 0; c < sum.NComp(); c++ { parMul[c] = weight * parent.Multiplier()[c] / sum.Multiplier()[c] } switch sum.NComp() { case 1: gpu.Madd(sum.array.Component(0), sum.array.Component(0), parent.array.Component(0), parMul[0]) case 3: gpu.VecMadd(sum.Array(), sum.Array(), parent.Array(), parMul) default: panic(InputErr("sum is not implemented for NComp: " + fmt.Sprint(sum.NComp()))) } } } else { for p, parent := range parents { for c := range sum.multiplier { sum.multiplier[c] += parent.multiplier[c] * u.weight[p] } } } }
func (s *HeunSolver) Step() { e := GetEngine() equation := e.equation // First update all inputs dt := engine.dt.Scalar() for i := range equation { Assert(equation[i].kind == EQN_PDE1) equation[i].input[0].Update() } // Then step all outputs // and invalidate them. // stage 0 for i := range equation { y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier checkUniform(dyMul) s.buffer[i] = Pool.Get(y.NComp(), y.Size3D()) s.buffer[i].CopyFromDevice(dy.Array()) // save for later gpu.Madd(y.Array(), y.Array(), dy.Array(), dt*dyMul[0]) // initial euler step y.Invalidate() } // Advance time e.time.SetScalar(e.time.Scalar() + dt) // update inputs again for i := range equation { Assert(equation[i].kind == EQN_PDE1) equation[i].input[0].Update() } // stage 1 for i := range equation { y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier h := float64(dt * dyMul[0]) gpu.MAdd2Async(y.Array(), dy.Array(), 0.5*h, s.buffer[i], -0.5*h, y.Array().Stream) // corrected step y.Array().Sync() Pool.Recycle(&s.buffer[i]) y.Invalidate() } e.step.SetScalar(e.step.Scalar() + 1) // advance time step }
// sum += parent func (sum *Quant) Add(parent *Quant) { invalidated := false for c := 0; c < sum.NComp(); c++ { parComp := parent.array.Component(c) parMul := parent.multiplier[c] if parMul == 0 { continue } sumMul := sum.multiplier[c] sumComp := sum.array.Component(c) // does not alloc gpu.Madd(sumComp, sumComp, parComp, parMul/sumMul) // divide by sum's multiplier! invalidated = true } if invalidated { sum.Invalidate() } }
func (s *BDFAM12) Step() { e := GetEngine() t0 := e.time.Scalar() s.badSteps.SetScalar(0) s.iterations.SetScalar(0) equation := e.equation //~ make sure that errors history is wiped for t0 = 0 s! if t0 == 0.0 { for i := range equation { s.err_list[i].Init() s.steps_list[i].Init() } } //~ save everything in the begining e.UpdateEqRHS() for i := range equation { y := equation[i].LHS() dy := equation[i].RHS() s.y0buffer[i].CopyFromDevice(y.Array()) //~ save for later s.dy0buffer[i].CopyFromDevice(dy.Array()) //~ save for later } const maxTry = 10 //~ undo at most this many bad steps const headRoom = 0.8 const maxIterErr = 0.1 const maxIter = 5 const alpha_ref = 0.6 try := 0 restrict_step := false for { dt := engine.dt.Scalar() badStep := false badIterator := false er := make([]float64, len(equation)) alp := make([]float64, len(equation)) //~ Do zero-order approximation with explicit Euler for i := range equation { y := equation[i].LHS() dy := equation[i].RHS() dyMul := dy.multiplier t_step := dt * dyMul[0] gpu.Madd(y.Array(), s.y0buffer[i], s.dy0buffer[i], t_step) y.Invalidate() } s.iterations.SetScalar(s.iterations.Scalar() + 1) //~ Do predictor using implicit Euler e.time.SetScalar(t0 + dt) e.UpdateEqRHS() for i := range equation { s.dybuffer[i].CopyFromDevice(equation[i].RHS().Array()) } for i := range equation { er[i] = maxIterErr } iter := 0 err := 1.0 α := 0.0 for { for i := range equation { y := equation[i].LHS() dy := equation[i].RHS() COMP := dy.NComp() srCOMP := 1.0 / math.Sqrt(float64(COMP)) h := dt * dy.multiplier[0] gpu.Madd(s.ybuffer[i], s.y0buffer[i], dy.Array(), h) tErr := 0.0 for p := 0; p < COMP; p++ { diffy := float64(s.diff[i].MaxDiff(y.Array().Component(p), s.ybuffer[i].Component(p))) maxy := float64(s.diff[i].MaxAbs(s.ybuffer[i].Component(p))) tErr += math.Pow(diffy/(s.maxAbsErr[i].Scalar()+maxy*s.maxRelErr[i].Scalar()), 2.0) } tErr = srCOMP * math.Sqrt(tErr) α = tErr / er[i] alp[i] = α s.alpha[i] = α er[i] = tErr y.Array().CopyFromDevice(s.ybuffer[i]) y.Invalidate() } //~ Get the largest error sort.Float64s(er) sort.Float64s(alp) err = er[len(equation)-1] α = alp[len(equation)-1] iter = iter + 1 s.iterations.SetScalar(s.iterations.Scalar() + 1) //~ Check first if the target error is reached if err <= maxIterErr { break } //~ If not, then check for convergence if α >= 1.0 || iter > maxIter { badIterator = true break } e.UpdateEqRHS() } //~ If fixed-point iterator cannot converge, then panic if badIterator && try == (maxTry-1) { panic(Bug(fmt.Sprintf("The BDF Euler iterator cannot converge! Please increase the maximum number of iterations and re-run!"))) } else if badIterator { //~ if there is a bad step in iterator then do hard/soft for step correction for fast/slow convergence h_alpha := 0.5 * dt if α > alpha_ref { h_alpha = dt * math.Pow(alpha_ref/α, 0.5) } engine.dt.SetScalar(h_alpha) restrict_step = true continue } //~ Save function value of the comparator //~ and restore dy as estimated by explicit Euler for i := range equation { s.y1buffer[i].CopyFromDevice(equation[i].LHS().Array()) equation[i].RHS().Array().CopyFromDevice(s.dybuffer[i]) } //~ Apply embedded 2nd order implicit method (trapezoidal) for i := range equation { er[i] = maxIterErr } iter = 0 err = 1.0 badIterator = false for { for i := range equation { y := equation[i].LHS() dy := equation[i].RHS() COMP := dy.NComp() srCOMP := 1.0 / math.Sqrt(float64(COMP)) h := float64(dt * dy.multiplier[0]) gpu.AddMadd(s.ybuffer[i], s.y0buffer[i], dy.Array(), s.dy0buffer[i], 0.5*h) tErr := 0.0 for p := 0; p < COMP; p++ { diffy := float64(s.diff[i].MaxDiff(y.Array().Component(p), s.ybuffer[i].Component(p))) maxy := float64(s.diff[i].MaxAbs(s.ybuffer[i].Component(p))) tErr += math.Pow(diffy/(s.maxAbsErr[i].Scalar()+maxy*s.maxRelErr[i].Scalar()), 2.0) } tErr = srCOMP * math.Sqrt(tErr) alp[i] = tErr / er[i] er[i] = tErr y.Array().CopyFromDevice(s.ybuffer[i]) y.Invalidate() } //~ Get the largest error sort.Float64s(er) sort.Float64s(alp) err = er[len(equation)-1] α = alp[len(equation)-1] iter = iter + 1 s.iterations.SetScalar(s.iterations.Scalar() + 1) //~ Check first if the target error is reached if err <= maxIterErr { break } //~ If not, then check for convergence if α >= 1.0 || iter > maxIter { badIterator = true break } e.UpdateEqRHS() } if badIterator && try == (maxTry-1) { //~ If fixed-point iterator cannot converge, then panic panic(Bug(fmt.Sprintf("The BDF Trapezoidal iterator cannot converge! Please decrease the error the maximum number of iterations and re-run!"))) } else if badIterator { //~ if there is a bad step in iterator then do hard/soft for step correction for fast/slow convergence h_alpha := 0.5 * dt if α > alpha_ref { h_alpha = dt * math.Pow(alpha_ref/α, 0.5) } engine.dt.SetScalar(h_alpha) continue } for i := range equation { y := equation[i].LHS() COMP := y.NComp() srCOMP := 1.0 / math.Sqrt(float64(COMP)) tErr := 0.0 for p := 0; p < COMP; p++ { diffy := float64(s.diff[i].MaxDiff(y.Array().Component(p), s.y1buffer[i].Component(p))) maxy := float64(s.diff[i].MaxAbs(s.y1buffer[i].Component(p))) tErr += math.Pow(diffy/(s.maxAbsErr[i].Scalar()+maxy*s.maxRelErr[i].Scalar()), 2.0) } tErr = srCOMP * math.Sqrt(tErr) if tErr > 1.0 { s.badSteps.SetScalar(s.badSteps.Scalar() + 1) badStep = true } s.err[i].SetScalar(tErr) //~ Estimate step correction step_corr := math.Pow(headRoom/tErr, 0.5) h_r := dt * step_corr new_dt := h_r //~ if iterator reported convergence problems, then the step correction should be restricted according to the linear prediction of the sweet convergence spot. if restrict_step { h_alpha := dt * math.Pow(alpha_ref/s.alpha[i], 0.5) new_dt = math.Min(h_r, h_alpha) restrict_step = false } //~ User-defined limiter for the new step. Just for stability experiments. if new_dt < s.minDt.Scalar() { new_dt = s.minDt.Scalar() } if new_dt > s.maxDt.Scalar() { new_dt = s.maxDt.Scalar() } s.newDt[i] = new_dt //~ Keep the history of 'good' errors if !badStep { s.err_list[i].PushFront(tErr) s.steps_list[i].PushFront(dt) if s.err_list[i].Len() == 10 { s.err_list[i].Remove(s.err_list[i].Back()) s.steps_list[i].Remove(s.steps_list[i].Back()) } } } //~ Get the new timestep sort.Float64s(s.newDt) nDt := s.newDt[0] engine.dt.SetScalar(nDt) if !badStep || nDt == s.minDt.Scalar() { break } if try > maxTry { panic(Bug(fmt.Sprint("The solver cannot converge after ", maxTry, " badsteps"))) } try++ } //~ Advance step e.step.SetScalar(e.step.Scalar() + 1) // advance time step }
func (s *BDFEuler) Step() { e := GetEngine() equation := e.equation for i := range equation { equation[i].input[0].Update() } // get dt here to avoid updates later on. dt := engine.dt.Scalar() // Advance time and update all inputs e.time.SetScalar(e.time.Scalar() + dt) // Then step all outputs (without intermediate updates!) // and invalidate them. for i := range equation { err := 1.0e38 s.iterations.SetScalar(0) iter := 0 // Do forward Euler step // Zero order approximation y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier h := dt * dyMul[0] s.y0buffer[i].CopyFromDevice(y.Array()) // save for later gpu.Madd(y.Array(), s.y0buffer[i], dy.Array(), h) y.Invalidate() equation[i].input[0].Update() s.iterations.SetScalar(s.iterations.Scalar() + 1) // Do backward Euler step and solve it // Do higher order approximation until converges // Using fixed-point iterator maxIterErr := s.maxIterErr[i].Scalar() maxIter := int(s.maxIter[i].Scalar()) for err > maxIterErr { gpu.Madd(s.ybuffer[i], s.y0buffer[i], dy.Array(), h) err = float64(s.diff[i].MaxDiff(y.Array(), s.ybuffer[i])) sum := float64(s.diff[i].MaxSum(y.Array(), s.ybuffer[i])) if sum > 0.0 { err = err / sum } iter = iter + 1 s.iterations.SetScalar(s.iterations.Scalar() + 1) y.Array().CopyFromDevice(s.ybuffer[i]) y.Invalidate() equation[i].input[0].Update() if iter > maxIter { panic(Bug(fmt.Sprintf("The BDF iterator cannot converge for %s! Please decrease the time step and re-run!", y.Name()))) } } } // Advance step e.step.SetScalar(e.step.Scalar() + 1) // advance step }
// Take one time step func (s *RK12Solver) Step() { e := GetEngine() equation := e.equation // First update all inputs for i := range equation { Assert(equation[i].kind == EQN_PDE1) equation[i].input[0].Update() } // Then step all outputs // and invalidate them. // stage 0 t0 := e.time.Scalar() for i := range equation { y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier checkUniform(dyMul) s.dybuffer[i].CopyFromDevice(dy.Array()) // save for later s.y0buffer[i].CopyFromDevice(y.Array()) // save for later } const maxTry = 10 // undo at most this many bad steps const headRoom = 0.8 try := 0 for { // We need to update timestep if the step has failed dt := engine.dt.Scalar() // initial euler step for i := range equation { y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier if try > 0 { // restore previous initial conditions y.Array().CopyFromDevice(s.y0buffer[i]) dy.Array().CopyFromDevice(s.dybuffer[i]) } gpu.Madd(y.Array(), y.Array(), dy.Array(), dt*dyMul[0]) y.Invalidate() } // Advance time e.time.SetScalar(t0 + dt) // update inputs again for i := range equation { equation[i].input[0].Update() } // stage 1 badStep := false minFactor := 2.0 for i := range equation { y := equation[i].output[0] dy := equation[i].input[0] dyMul := dy.multiplier h := float64(dt * dyMul[0]) gpu.MAdd2Async(y.Array(), dy.Array(), 0.5*h, s.dybuffer[i], -0.5*h, y.Array().Stream) // corrected step y.Array().Sync() // error estimate stepDiff := s.diff[i].MaxDiff(dy.Array(), s.dybuffer[i]) * h err := float64(stepDiff) s.err[i].SetScalar(err) maxErr := s.maxErr[i].Scalar() if err > maxErr { s.badSteps.SetScalar(s.badSteps.Scalar() + 1) badStep = true } if (!badStep || try == maxTry-1) && err > s.peakErr[i].Scalar() { // peak error should be that of good step, unless last trial which will not be undone s.peakErr[i].SetScalar(err) } factor := 0.0 if !badStep { factor = math.Sqrt(maxErr / err) //maxErr / err } else { factor = math.Pow(maxErr/err, 1./3.) } factor *= headRoom // do not increase/cut too much // TODO: give user the control: if factor > 1.5 { factor = 1.5 } if factor < 0.1 { factor = 0.1 } if factor < minFactor { minFactor = factor } // take minimum time increase factor of all eqns. y.Invalidate() //if badStep{break} // do not waste time on other equations } // Set new time step but do not go beyond min/max bounds newDt := dt * minFactor if newDt < s.minDt.Scalar() { newDt = s.minDt.Scalar() } if newDt > s.maxDt.Scalar() { newDt = s.maxDt.Scalar() } e.dt.SetScalar(newDt) if !badStep || newDt == s.minDt.Scalar() { break } if try > maxTry { panic(Bug(fmt.Sprint("The solver cannot converge after ", maxTry, " badsteps"))) } try++ } // end try // advance time step e.step.SetScalar(e.step.Scalar() + 1) }