// explicit Runge-Kutta step function func erk_step(o *ODE, y []float64, x float64, args ...interface{}) (rerr float64, err error) { for i := 0; i < o.nstg; i++ { o.u[i] = x + o.h*o.erkdat.c[i] la.VecCopy(o.v[i], 1, y) for j := 0; j < i; j++ { la.VecAdd(o.v[i], o.h*o.erkdat.a[i][j], o.f[j]) } if i == 0 && o.erkdat.usefp && !o.first { la.VecCopy(o.f[i], 1, o.f[o.nstg-1]) } else { o.nfeval += 1 err = o.fcn(o.f[i], o.u[i], o.v[i], args...) if err != nil { return } } } var lerrm float64 // m component of local error estimate for m := 0; m < o.ndim; m++ { lerrm = 0.0 for i := 0; i < o.nstg; i++ { o.w[0][m] += o.erkdat.b[i] * o.f[i][m] * o.h lerrm += (o.erkdat.be[i] - o.erkdat.b[i]) * o.f[i][m] * o.h } rerr += math.Pow(lerrm/o.scal[m], 2.0) } rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10) return }
// Draw draws or save figure with plot // dirout -- directory to save figure // fname -- file name; e.g. myplot.eps or myplot.png. Use "" to skip saving // show -- shows figure // extra -- is called just after Subplot command and before any plotting // Note: subplots will be split if using 'eps' files func Draw(dirout, fname string, show bool, extra ExtraPlt) { var fnk string // filename key var ext string // extension var eps bool // is eps figure if fname != "" { fnk = io.FnKey(fname) ext = io.FnExt(fname) eps = ext == ".eps" } nplots := len(Splots) nr, nc := utl.BestSquare(nplots) var k int for i := 0; i < nr; i++ { for j := 0; j < nc; j++ { if !eps { plt.Subplot(nr, nc, k+1) } if extra != nil { extra(i+1, j+1, nplots) } if Splots[k].Title != "" { plt.Title(Splots[k].Title, Splots[k].Topts) } data := Splots[k].Data for _, d := range data { if d.Style.L == "" { d.Style.L = d.Alias } x, y := d.X, d.Y if math.Abs(Splots[k].Xscale) > 0 { x = make([]float64, len(d.X)) la.VecCopy(x, Splots[k].Xscale, d.X) } if math.Abs(Splots[k].Yscale) > 0 { y = make([]float64, len(d.Y)) la.VecCopy(y, Splots[k].Yscale, d.Y) } plt.Plot(x, y, d.Style.GetArgs("clip_on=0")) } plt.Gll(Splots[k].Xlbl, Splots[k].Ylbl, Splots[k].GllArgs) if eps { savefig(dirout, fnk, ext, k) plt.Clf() } k += 1 } } if !eps && fname != "" { savefig(dirout, fnk, ext, -1) } if show { plt.Show() } }
func TestJacobian03(tst *testing.T) { //verbose() chk.PrintTitle("TestJacobian 03") // grid var g fdm.Grid2D //g.Init(1.0, 1.0, 4, 4) g.Init(1.0, 1.0, 6, 6) //g.Init(1.0, 1.0, 11, 11) // equations numbering var e fdm.Equations peq := utl.IntUnique(g.L, g.R, g.B, g.T) e.Init(g.N, peq) // K11 and K12 var K11, K12 la.Triplet fdm.InitK11andK12(&K11, &K12, &e) // assembly F1 := make([]float64, e.N1) fdm.Assemble(&K11, &K12, F1, nil, &g, &e) // prescribed values U2 := make([]float64, e.N2) for _, eq := range g.L { U2[e.FR2[eq]] = 50.0 } for _, eq := range g.R { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.B { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.T { U2[e.FR2[eq]] = 50.0 } // functions k11 := K11.ToMatrix(nil) k12 := K12.ToMatrix(nil) ffcn := func(fU1, U1 []float64) error { // K11*U1 + K12*U2 - F1 la.VecCopy(fU1, -1, F1) // fU1 := (-F1) la.SpMatVecMulAdd(fU1, 1, k11, U1) // fU1 += K11*U1 la.SpMatVecMulAdd(fU1, 1, k12, U2) // fU1 += K12*U2 return nil } Jfcn := func(dfU1dU1 *la.Triplet, U1 []float64) error { fdm.Assemble(dfU1dU1, &K12, F1, nil, &g, &e) return nil } U1 := make([]float64, e.N1) CompareJac(tst, ffcn, Jfcn, U1, 0.0075) print_jac := false if print_jac { W1 := make([]float64, e.N1) fU1 := make([]float64, e.N1) ffcn(fU1, U1) var Jnum la.Triplet Jnum.Init(e.N1, e.N1, e.N1*e.N1) Jacobian(&Jnum, ffcn, U1, fU1, W1) la.PrintMat("K11 ", K11.ToMatrix(nil).ToDense(), "%g ", false) la.PrintMat("Jnum", Jnum.ToMatrix(nil).ToDense(), "%g ", false) } test_ffcn := false if test_ffcn { Uc := []float64{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0, 25.0, 325.0 / 22.0, 100.0 / 11.0, 50.0 / 11.0, 0.0, 50.0, 775.0 / 22.0, 25.0, 375.0 / 22.0, 100.0 / 11.0, 0.0, 50.0, 450.0 / 11.0, 725.0 / 22.0, 25.0, 325.0 / 22.0, 0.0, 50.0, 500.0 / 11.0, 450.0 / 11.0, 775.0 / 22.0, 25.0, 0.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, } for i := 0; i < e.N1; i++ { U1[i] = Uc[e.RF1[i]] } fU1 := make([]float64, e.N1) min, max := la.VecMinMax(fU1) io.Pf("min/max fU1 = %v\n", min, max) } }
// backward-Euler func bweuler_step(o *ODE, y []float64, x float64, args ...interface{}) (rerr float64, err error) { // new x x += o.h // previous y la.VecCopy(o.v[0], 1, y) // v := y_old // iterations var rmsnr float64 // rms norm of residual var it int for it = 0; it < o.NmaxIt; it++ { // max iterations ? o.nit = it + 1 if o.nit > o.nitmax { o.nitmax = o.nit } // calculate f @ update y o.nfeval += 1 err = o.fcn(o.f[0], x, y, args...) if err != nil { return } // calculate residual rmsnr = 0.0 for i := 0; i < o.ndim; i++ { o.w[0][i] = y[i] - o.v[0][i] - o.h*o.f[0][i] // w := residual if o.UseRmsNorm { rmsnr += math.Pow(o.w[0][i]/o.scal[i], 2.0) } else { rmsnr += o.w[0][i] * o.w[0][i] } } if o.UseRmsNorm { rmsnr = math.Sqrt(rmsnr / float64(o.ndim)) } else { rmsnr = math.Sqrt(rmsnr) } if o.Verbose { io.Pfgrey(" residual = %10.5e (tol = %10.5e)\n", rmsnr, o.fnewt) } // converged if rmsnr < o.fnewt { break } // Jacobian matrix if o.doinit || !o.CteTg { o.njeval += 1 // calculate Jacobian if o.jac == nil { // numerical err = num.Jacobian(&o.dfdyT, func(fy, yy []float64) (e error) { e = o.fcn(fy, x, yy, args...) return }, y, o.f[0], o.δw[0], o.Distr) // δw works here as workspace variable } else { // analytical err = o.jac(&o.dfdyT, x, y, args...) } if err != nil { return } // debug //if true { //io.Pfblue2("J = %v\n", o.dfdyT.ToMatrix(nil).ToDense()[0]) //} if o.doinit { o.rctriR = new(la.Triplet) o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len()) } // calculate drdy matrix la.SpTriAdd(o.rctriR, 1, o.mTri, -o.h, &o.dfdyT) // rctriR := I - h * dfdy //la.PrintMat("rcmat", o.rctriR.ToMatrix(nil).ToDense(), "%8.3f", false) // initialise linear solver if o.doinit { err = o.lsolR.InitR(o.rctriR, false, false, false) if err != nil { return } } // perform factorisation o.ndecomp += 1 o.lsolR.Fact() } // solve linear system o.nlinsol += 1 o.lsolR.SolveR(o.δw[0], o.w[0], false) // δw := inv(rcmat) * residual // update y for i := 0; i < o.ndim; i++ { y[i] -= o.δw[0][i] } } // did not converge if it == o.NmaxIt-1 { chk.Panic("bweuler_step failed with it = %d", it) } return 1e+20, err // must not be used with automatic substepping }
func erk_accept(o *ODE, y []float64) { la.VecCopy(y, 1, o.w[0]) // update y }
func Test_nls04(tst *testing.T) { //verbose() chk.PrintTitle("nls04. finite differences problem") // grid var g fdm.Grid2D g.Init(1.0, 1.0, 6, 6) // equations numbering var e fdm.Equations peq := utl.IntUnique(g.L, g.R, g.B, g.T) e.Init(g.N, peq) // K11 and K12 var K11, K12 la.Triplet fdm.InitK11andK12(&K11, &K12, &e) // assembly F1 := make([]float64, e.N1) fdm.Assemble(&K11, &K12, F1, nil, &g, &e) // prescribed values U2 := make([]float64, e.N2) for _, eq := range g.L { U2[e.FR2[eq]] = 50.0 } for _, eq := range g.R { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.B { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.T { U2[e.FR2[eq]] = 50.0 } // functions k11 := K11.ToMatrix(nil) k12 := K12.ToMatrix(nil) ffcn := func(fU1, U1 []float64) error { // K11*U1 + K12*U2 - F1 la.VecCopy(fU1, -1, F1) // fU1 := (-F1) la.SpMatVecMulAdd(fU1, 1, k11, U1) // fU1 += K11*U1 la.SpMatVecMulAdd(fU1, 1, k12, U2) // fU1 += K12*U2 return nil } Jfcn := func(dfU1dU1 *la.Triplet, U1 []float64) error { fdm.Assemble(dfU1dU1, &K12, F1, nil, &g, &e) return nil } JfcnD := func(dfU1dU1 [][]float64, U1 []float64) error { la.MatCopy(dfU1dU1, 1, K11.ToMatrix(nil).ToDense()) return nil } prms := map[string]float64{ "atol": 1e-8, "rtol": 1e-8, "ftol": 1e-12, "lSearch": 0.0, } // init var nls_sps NlSolver // sparse analytical var nls_num NlSolver // sparse numerical var nls_den NlSolver // dense analytical nls_sps.Init(e.N1, ffcn, Jfcn, nil, false, false, prms) nls_num.Init(e.N1, ffcn, nil, nil, false, true, prms) nls_den.Init(e.N1, ffcn, nil, JfcnD, true, false, prms) defer nls_sps.Clean() defer nls_num.Clean() defer nls_den.Clean() // results U1sps := make([]float64, e.N1) U1num := make([]float64, e.N1) U1den := make([]float64, e.N1) Usps := make([]float64, e.N) Unum := make([]float64, e.N) Uden := make([]float64, e.N) // solution Uc := []float64{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0, 25.0, 325.0 / 22.0, 100.0 / 11.0, 50.0 / 11.0, 0.0, 50.0, 775.0 / 22.0, 25.0, 375.0 / 22.0, 100.0 / 11.0, 0.0, 50.0, 450.0 / 11.0, 725.0 / 22.0, 25.0, 325.0 / 22.0, 0.0, 50.0, 500.0 / 11.0, 450.0 / 11.0, 775.0 / 22.0, 25.0, 0.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, } io.PfYel("\n---- sparse -------- Analytical Jacobian -------------------\n") // solve err := nls_sps.Solve(U1sps, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Usps, U1sps, U2, &e) chk.Vector(tst, "Usps", 1e-14, Usps, Uc) // plot if false { g.Contour("results", "fig_t_heat_square", nil, Usps, 11, false) } io.PfYel("\n---- dense -------- Analytical Jacobian -------------------\n") // solve err = nls_den.Solve(U1den, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Uden, U1den, U2, &e) chk.Vector(tst, "Uden", 1e-14, Uden, Uc) io.PfYel("\n---- sparse -------- Numerical Jacobian -------------------\n") // solve err = nls_num.Solve(U1num, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Unum, U1num, U2, &e) chk.Vector(tst, "Unum", 1e-14, Unum, Uc) }
// Radau5 step function func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) { // factors α := r5.α_ / o.h β := r5.β_ / o.h γ := r5.γ_ / o.h // Jacobian and decomposition if o.reuseJdec { o.reuseJdec = false } else { // calculate only first Jacobian for all iterations (simple/modified Newton's method) if o.reuseJ { o.reuseJ = false } else if !o.jacIsOK { // Jacobian triplet if o.jac == nil { // numerical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") } err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) { e = o.fcn(fy, o.h, x0, y, args...) return }, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable } else { // analytical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") } err = o.jac(&o.dfdyT, o.h, x0, y0, args...) } if err != nil { return } // create M matrix if o.doinit && !o.hasM { o.mTri = new(la.Triplet) if o.Distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz o.mTri.Init(o.ndim, o.ndim, endp1-start) for i := start; i < endp1; i++ { o.mTri.Put(i, i, 1.0) } } else { o.mTri.Init(o.ndim, o.ndim, o.ndim) for i := 0; i < o.ndim; i++ { o.mTri.Put(i, i, 1.0) } } } o.njeval += 1 o.jacIsOK = true } // initialise triplets if o.doinit { o.rctriR = new(la.Triplet) o.rctriC = new(la.TripletC) o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len()) xzmono := o.Distr o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono) } // update triplets la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT) // rctriR := γ*M - dfdy la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy // initialise solver if o.doinit { err = o.lsolR.InitR(o.rctriR, false, false, false) if err != nil { return } err = o.lsolC.InitC(o.rctriC, false, false, false) if err != nil { return } } // perform factorisation o.lsolR.Fact() o.lsolC.Fact() o.ndecomp += 1 } // updated u[i] o.u[0] = x0 + r5.c[0]*o.h o.u[1] = x0 + r5.c[1]*o.h o.u[2] = x0 + r5.c[2]*o.h // (trial/initial) updated z[i] and w[i] if o.first || o.ZeroTrial { for m := 0; m < o.ndim; m++ { o.z[0][m], o.w[0][m] = 0.0, 0.0 o.z[1][m], o.w[1][m] = 0.0, 0.0 o.z[2][m], o.w[2][m] = 0.0, 0.0 } } else { c3q := o.h / o.hprev c1q := r5.μ1 * c3q c2q := r5.μ2 * c3q for m := 0; m < o.ndim; m++ { o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m])) o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m])) o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m])) o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m] o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m] o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m] } } // iterations o.nit = 0 o.η = math.Pow(max(o.η, o.ϵ), 0.8) o.θ = o.θmax o.diverg = false var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64 var it int for it = 0; it < o.NmaxIt; it++ { // max iterations ? o.nit = it + 1 if o.nit > o.nitmax { o.nitmax = o.nit } // evaluate f(x,y) at (u[i],v[i]=y0+z[i]) for i := 0; i < 3; i++ { for m := 0; m < o.ndim; m++ { o.v[i][m] = y0[m] + o.z[i][m] } o.nfeval += 1 err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...) if err != nil { return } } // calc rhs if o.hasM { // using δw as workspace here la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0 la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1 la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2 if o.Distr { mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here } for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m] } } else { for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m] } } // solve linear system o.nlinsol += 1 var errR, errC error if !o.Distr && o.Pll { wg := new(sync.WaitGroup) wg.Add(2) go func() { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) wg.Done() }() go func() { errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) wg.Done() }() wg.Wait() } else { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) } // check for errors from linear solution if errR != nil || errC != nil { var errmsg string if errR != nil { errmsg += errR.Error() } if errC != nil { if errR != nil { errmsg += "\n" } errmsg += errC.Error() } err = errors.New(errmsg) return } // update w and z for m := 0; m < o.ndim; m++ { o.w[0][m] += o.δw[0][m] o.w[1][m] += o.δw[1][m] o.w[2][m] += o.δw[2][m] o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m] o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m] o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m] } // rms norm of δw Lδw = 0.0 for m := 0; m < o.ndim; m++ { Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0) } Lδw = math.Sqrt(Lδw / float64(3*o.ndim)) // check convergence if it > 0 { thq = Lδw / oLδw if it == 1 { o.θ = thq } else { o.θ = math.Sqrt(thq * othq) } othq = thq if o.θ < 0.99 { o.η = o.θ / (1.0 - o.θ) iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ) itRerr = iterr / o.fnewt if itRerr >= 1.0 { // diverging qnewt = max(1.0e-4, min(20.0, itRerr)) o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit))) o.diverg = true break } } else { // diverging badly (unexpected step-rejection) o.dvfac = 0.5 o.diverg = true break } } // save old norm oLδw = Lδw // converged if o.η*Lδw < o.fnewt { break } } // did not converge if it == o.NmaxIt-1 { chk.Panic("radau5_step failed with it=%d", it) } // diverging => stop if o.diverg { rerr = 2.0 // must leave state intact, any rerr is OK return } // error estimate if o.LerrStrat == 1 { // simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems) for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m] rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0) } rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10) } else { // common if o.hasM { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] } if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] + γ*o.ez[m] } } // HW-VII p123 Eq.(8.19) if o.LerrStrat == 2 { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) // HW-VII p123 Eq.(8.20) } else { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) if !(rerr < 1.0) { if o.first || o.reject { for m := 0; m < o.ndim; m++ { o.v[0][m] = y0[m] + o.lerr[m] // y0perr } o.nfeval += 1 err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr if err != nil { return } if o.hasM { la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez } o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) } } } } return }
// Solve solves from (xa,ya) to (xb,yb) => find yb (stored in y) func (o *ODE) Solve(y []float64, x, xb, Δx float64, fixstp bool, args ...interface{}) (err error) { // check if xb < x { err = chk.Err(_ode_err3, xb, x) return } // derived variables o.fnewt = max(10.0*o.ϵ/o.Rtol, min(0.03, math.Sqrt(o.Rtol))) // initial step size Δx = min(Δx, xb-x) if fixstp { o.h = Δx } else { o.h = min(Δx, o.IniH) } o.hprev = o.h // output initial state if o.out != nil { o.out(true, o.h, x, y, args...) } // stat variables o.nfeval = 0 o.njeval = 0 o.nsteps = 0 o.naccepted = 0 o.nrejected = 0 o.ndecomp = 0 o.nlinsol = 0 o.nitmax = 0 // control variables o.doinit = true o.first = true o.last = false o.reject = false o.diverg = false o.dvfac = 0 o.η = 1.0 o.jacIsOK = false o.reuseJdec = false o.reuseJ = false o.nit = 0 o.hopt = o.h o.θ = o.θmax // local error indicator var rerr float64 // linear solver lsname := "umfpack" if o.Distr { lsname = "mumps" } o.lsolR = la.GetSolver(lsname) o.lsolC = la.GetSolver(lsname) // clean up and show stat before leaving defer func() { o.lsolR.Clean() o.lsolC.Clean() if !o.silent { o.Stat() } }() // first scaling variable la.VecScaleAbs(o.scal, o.Atol, o.Rtol, y) // o.scal := o.Atol + o.Rtol * abs(y) // fixed steps if fixstp { la.VecCopy(o.w[0], 1, y) // copy initial values to worksapce if o.Verbose { io.Pfgreen("x = %v\n", x) } for x < xb { //if x + o.h > xb { o.h = xb - x } if o.jac == nil { // numerical Jacobian if o.method == "Radau5" { o.nfeval += 1 o.fcn(o.f0, x, y, args...) } } o.reuseJdec = false o.reuseJ = false o.jacIsOK = false o.step(o, y, x, args...) o.nsteps += 1 o.doinit = false o.first = false o.hprev = o.h x += o.h o.accept(o, y) if o.out != nil { o.out(false, o.h, x, y, args...) } if o.Verbose { io.Pfgreen("x = %v\n", x) } } return } // first function evaluation o.nfeval += 1 o.fcn(o.f0, x, y, args...) // o.f0 := f(x,y) // time loop var dxmax, xstep, fac, div, dxnew, facgus, old_h, old_rerr float64 var dxratio float64 var failed bool for x < xb { dxmax, xstep = Δx, x+Δx failed = false for iss := 0; iss < o.NmaxSS+1; iss++ { // total number of substeps o.nsteps += 1 // error: did not converge if iss == o.NmaxSS { failed = true break } // converged? if x-xstep >= 0.0 { break } // step update rerr, err = o.step(o, y, x, args...) // initialise only once o.doinit = false // iterations diverging ? if o.diverg { o.diverg = false o.reject = true o.last = false o.h = o.dvfac * o.h continue } // step size change fac = min(o.Mfac, o.Mfac*float64(1+2*o.NmaxIt)/float64(o.nit+2*o.NmaxIt)) div = max(o.Mmin, min(o.Mmax, math.Pow(rerr, 0.25)/fac)) dxnew = o.h / div // accepted if rerr < 1.0 { // set flags o.naccepted += 1 o.first = false o.jacIsOK = false // update x and y o.hprev = o.h x += o.h o.accept(o, y) // output if o.out != nil { o.out(false, o.h, x, y, args...) } // converged ? if o.last { o.hopt = o.h // optimal h break } // predictive controller of Gustafsson if o.PredCtrl { if o.naccepted > 1 { facgus = (old_h / o.h) * math.Pow(math.Pow(rerr, 2.0)/old_rerr, 0.25) / o.Mfac facgus = max(o.Mmin, min(o.Mmax, facgus)) div = max(div, facgus) dxnew = o.h / div } old_h = o.h old_rerr = max(1.0e-2, rerr) } // calc new scal and f0 la.VecScaleAbs(o.scal, o.Atol, o.Rtol, y) // o.scal := o.Atol + o.Rtol * abs(y) o.nfeval += 1 o.fcn(o.f0, x, y, args...) // o.f0 := f(x,y) // new step size dxnew = min(dxnew, dxmax) if o.reject { // do not alow o.h to grow if previous was a reject dxnew = min(o.h, dxnew) } o.reject = false // do not reuse current Jacobian and decomposition by default o.reuseJdec = false // last step ? if x+dxnew-xstep >= 0.0 { o.last = true o.h = xstep - x } else { dxratio = dxnew / o.h o.reuseJdec = (o.θ <= o.θmax && dxratio >= o.C1h && dxratio <= o.C2h) if !o.reuseJdec { o.h = dxnew } } // check θ to decide if at least the Jacobian can be reused if !o.reuseJdec { o.reuseJ = (o.θ <= o.θmax) } // rejected } else { // set flags if o.naccepted > 0 { o.nrejected += 1 } o.reject = true o.last = false // new step size if o.first { o.h = 0.1 * o.h } else { o.h = dxnew } // last step if x+o.h > xstep { o.h = xstep - x } } } // sub-stepping failed if failed { err = chk.Err(_ode_err2, o.NmaxSS) break } } return }
// Solve solves linear programming problem func (o *LinIpm) Solve(verbose bool) (err error) { // starting point AAt := la.MatAlloc(o.Nl, o.Nl) // A*Aᵀ d := make([]float64, o.Nl) // inv(AAt) * b e := make([]float64, o.Nl) // A * c la.SpMatMatTrMul(AAt, 1, o.A) // AAt := A*Aᵀ la.SpMatVecMul(e, 1, o.A, o.C) // e := A * c la.SPDsolve2(d, o.L, AAt, o.B, e) // d := inv(AAt) * b and L := inv(AAt) * e la.SpMatTrVecMul(o.X, 1, o.A, d) // x := Aᵀ * d la.VecCopy(o.S, 1, o.C) // s := c la.SpMatTrVecMulAdd(o.S, -1, o.A, o.L) // s -= Aᵀλ xmin := o.X[0] smin := o.S[0] for i := 1; i < o.Nx; i++ { xmin = min(xmin, o.X[i]) smin = min(smin, o.S[i]) } δx := max(-1.5*xmin, 0) δs := max(-1.5*smin, 0) var xdots, xsum, ssum float64 for i := 0; i < o.Nx; i++ { o.X[i] += δx o.S[i] += δs xdots += o.X[i] * o.S[i] xsum += o.X[i] ssum += o.S[i] } δx = 0.5 * xdots / ssum δs = 0.5 * xdots / xsum for i := 0; i < o.Nx; i++ { o.X[i] += δx o.S[i] += δs } // constants for linear solver symmetric := false timing := false // auxiliary I := o.Nx + o.Nl // control variables var μ, σ float64 // μ and σ var xrmin float64 // min{ x_i / (-Δx_i) } (x_ratio_minimum) var srmin float64 // min{ s_i / (-Δs_i) } (s_ratio_minimum) var αpa float64 // α_prime_affine var αda float64 // α_dual_affine var μaff float64 // μ_affine var ctx, btl float64 // cᵀx and bᵀl // message if verbose { io.Pf("%3s%16s%16s\n", "it", "f(x)", "error") } // perform iterations it := 0 for it = 0; it < o.NmaxIt; it++ { // compute residual la.SpMatTrVecMul(o.Rx, 1, o.A, o.L) // rx := Aᵀλ la.SpMatVecMul(o.Rl, 1, o.A, o.X) // rλ := A x ctx, btl, μ = 0, 0, 0 for i := 0; i < o.Nx; i++ { o.Rx[i] += o.S[i] - o.C[i] o.Rs[i] = o.X[i] * o.S[i] ctx += o.C[i] * o.X[i] μ += o.X[i] * o.S[i] } for i := 0; i < o.Nl; i++ { o.Rl[i] -= o.B[i] btl += o.B[i] * o.L[i] } μ /= float64(o.Nx) // check convergence lerr := math.Abs(ctx-btl) / (1.0 + math.Abs(ctx)) if verbose { fx := la.VecDot(o.C, o.X) io.Pf("%3d%16.8e%16.8e\n", it, fx, lerr) } if lerr < o.Tol { break } // assemble Jacobian o.J.Start() o.J.PutCCMatAndMatT(o.A) for i := 0; i < o.Nx; i++ { o.J.Put(i, I+i, 1.0) o.J.Put(I+i, i, o.S[i]) o.J.Put(I+i, I+i, o.X[i]) } // solve linear system if it == 0 { err = o.Lis.InitR(o.J, symmetric, false, timing) if err != nil { return } } err = o.Lis.Fact() if err != nil { return } err = o.Lis.SolveR(o.Mdy, o.R, false) // mdy := inv(J) * R if err != nil { return } // control variables xrmin, srmin = o.calc_min_ratios() αpa = min(1, xrmin) αda = min(1, srmin) μaff = 0 for i := 0; i < o.Nx; i++ { μaff += (o.X[i] - αpa*o.Mdx[i]) * (o.S[i] - αda*o.Mds[i]) } μaff /= float64(o.Nx) σ = math.Pow(μaff/μ, 3) // update residual for i := 0; i < o.Nx; i++ { o.Rs[i] += o.Mdx[i]*o.Mds[i] - σ*μ } // solve linear system again err = o.Lis.SolveR(o.Mdy, o.R, false) // mdy := inv(J) * R if err != nil { return } // step lengths xrmin, srmin = o.calc_min_ratios() αpa = min(1, 0.99*xrmin) αda = min(1, 0.99*srmin) // update for i := 0; i < o.Nx; i++ { o.X[i] -= αpa * o.Mdx[i] o.S[i] -= αda * o.Mds[i] } for i := 0; i < o.Nl; i++ { o.L[i] -= αda * o.Mdl[i] } } // check convergence if it == o.NmaxIt { err = chk.Err("iterations did not converge") } return }