// AddtoRhs adds the essential bcs / constraints terms to the augmented fb vector func (o EssentialBcs) AddToRhs(fb []float64, sol *Solution) { // skip if there are no constraints if len(o.Bcs) == 0 { return } // add -At*λ to fb la.SpMatTrVecMulAdd(fb, -1, o.Am, sol.L) // fb += -1 * At * λ // assemble -rc = c - A*y into fb ny := len(sol.Y) for i, c := range o.Bcs { fb[ny+i] = c.Fcn.F(sol.T, nil) } la.SpMatVecMulAdd(fb[ny:], -1, o.Am, sol.Y) // fb += -1 * A * y }
func TestJacobian03(tst *testing.T) { //verbose() chk.PrintTitle("TestJacobian 03") // grid var g fdm.Grid2D //g.Init(1.0, 1.0, 4, 4) g.Init(1.0, 1.0, 6, 6) //g.Init(1.0, 1.0, 11, 11) // equations numbering var e fdm.Equations peq := utl.IntUnique(g.L, g.R, g.B, g.T) e.Init(g.N, peq) // K11 and K12 var K11, K12 la.Triplet fdm.InitK11andK12(&K11, &K12, &e) // assembly F1 := make([]float64, e.N1) fdm.Assemble(&K11, &K12, F1, nil, &g, &e) // prescribed values U2 := make([]float64, e.N2) for _, eq := range g.L { U2[e.FR2[eq]] = 50.0 } for _, eq := range g.R { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.B { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.T { U2[e.FR2[eq]] = 50.0 } // functions k11 := K11.ToMatrix(nil) k12 := K12.ToMatrix(nil) ffcn := func(fU1, U1 []float64) error { // K11*U1 + K12*U2 - F1 la.VecCopy(fU1, -1, F1) // fU1 := (-F1) la.SpMatVecMulAdd(fU1, 1, k11, U1) // fU1 += K11*U1 la.SpMatVecMulAdd(fU1, 1, k12, U2) // fU1 += K12*U2 return nil } Jfcn := func(dfU1dU1 *la.Triplet, U1 []float64) error { fdm.Assemble(dfU1dU1, &K12, F1, nil, &g, &e) return nil } U1 := make([]float64, e.N1) CompareJac(tst, ffcn, Jfcn, U1, 0.0075) print_jac := false if print_jac { W1 := make([]float64, e.N1) fU1 := make([]float64, e.N1) ffcn(fU1, U1) var Jnum la.Triplet Jnum.Init(e.N1, e.N1, e.N1*e.N1) Jacobian(&Jnum, ffcn, U1, fU1, W1) la.PrintMat("K11 ", K11.ToMatrix(nil).ToDense(), "%g ", false) la.PrintMat("Jnum", Jnum.ToMatrix(nil).ToDense(), "%g ", false) } test_ffcn := false if test_ffcn { Uc := []float64{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0, 25.0, 325.0 / 22.0, 100.0 / 11.0, 50.0 / 11.0, 0.0, 50.0, 775.0 / 22.0, 25.0, 375.0 / 22.0, 100.0 / 11.0, 0.0, 50.0, 450.0 / 11.0, 725.0 / 22.0, 25.0, 325.0 / 22.0, 0.0, 50.0, 500.0 / 11.0, 450.0 / 11.0, 775.0 / 22.0, 25.0, 0.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, } for i := 0; i < e.N1; i++ { U1[i] = Uc[e.RF1[i]] } fU1 := make([]float64, e.N1) min, max := la.VecMinMax(fU1) io.Pf("min/max fU1 = %v\n", min, max) } }
func Test_nls04(tst *testing.T) { //verbose() chk.PrintTitle("nls04. finite differences problem") // grid var g fdm.Grid2D g.Init(1.0, 1.0, 6, 6) // equations numbering var e fdm.Equations peq := utl.IntUnique(g.L, g.R, g.B, g.T) e.Init(g.N, peq) // K11 and K12 var K11, K12 la.Triplet fdm.InitK11andK12(&K11, &K12, &e) // assembly F1 := make([]float64, e.N1) fdm.Assemble(&K11, &K12, F1, nil, &g, &e) // prescribed values U2 := make([]float64, e.N2) for _, eq := range g.L { U2[e.FR2[eq]] = 50.0 } for _, eq := range g.R { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.B { U2[e.FR2[eq]] = 0.0 } for _, eq := range g.T { U2[e.FR2[eq]] = 50.0 } // functions k11 := K11.ToMatrix(nil) k12 := K12.ToMatrix(nil) ffcn := func(fU1, U1 []float64) error { // K11*U1 + K12*U2 - F1 la.VecCopy(fU1, -1, F1) // fU1 := (-F1) la.SpMatVecMulAdd(fU1, 1, k11, U1) // fU1 += K11*U1 la.SpMatVecMulAdd(fU1, 1, k12, U2) // fU1 += K12*U2 return nil } Jfcn := func(dfU1dU1 *la.Triplet, U1 []float64) error { fdm.Assemble(dfU1dU1, &K12, F1, nil, &g, &e) return nil } JfcnD := func(dfU1dU1 [][]float64, U1 []float64) error { la.MatCopy(dfU1dU1, 1, K11.ToMatrix(nil).ToDense()) return nil } prms := map[string]float64{ "atol": 1e-8, "rtol": 1e-8, "ftol": 1e-12, "lSearch": 0.0, } // init var nls_sps NlSolver // sparse analytical var nls_num NlSolver // sparse numerical var nls_den NlSolver // dense analytical nls_sps.Init(e.N1, ffcn, Jfcn, nil, false, false, prms) nls_num.Init(e.N1, ffcn, nil, nil, false, true, prms) nls_den.Init(e.N1, ffcn, nil, JfcnD, true, false, prms) defer nls_sps.Clean() defer nls_num.Clean() defer nls_den.Clean() // results U1sps := make([]float64, e.N1) U1num := make([]float64, e.N1) U1den := make([]float64, e.N1) Usps := make([]float64, e.N) Unum := make([]float64, e.N) Uden := make([]float64, e.N) // solution Uc := []float64{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 50.0, 25.0, 325.0 / 22.0, 100.0 / 11.0, 50.0 / 11.0, 0.0, 50.0, 775.0 / 22.0, 25.0, 375.0 / 22.0, 100.0 / 11.0, 0.0, 50.0, 450.0 / 11.0, 725.0 / 22.0, 25.0, 325.0 / 22.0, 0.0, 50.0, 500.0 / 11.0, 450.0 / 11.0, 775.0 / 22.0, 25.0, 0.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, } io.PfYel("\n---- sparse -------- Analytical Jacobian -------------------\n") // solve err := nls_sps.Solve(U1sps, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Usps, U1sps, U2, &e) chk.Vector(tst, "Usps", 1e-14, Usps, Uc) // plot if false { g.Contour("results", "fig_t_heat_square", nil, Usps, 11, false) } io.PfYel("\n---- dense -------- Analytical Jacobian -------------------\n") // solve err = nls_den.Solve(U1den, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Uden, U1den, U2, &e) chk.Vector(tst, "Uden", 1e-14, Uden, Uc) io.PfYel("\n---- sparse -------- Numerical Jacobian -------------------\n") // solve err = nls_num.Solve(U1num, false) if err != nil { chk.Panic(err.Error()) } // check fdm.JoinVecs(Unum, U1num, U2, &e) chk.Vector(tst, "Unum", 1e-14, Unum, Uc) }
// Radau5 step function func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) { // factors α := r5.α_ / o.h β := r5.β_ / o.h γ := r5.γ_ / o.h // Jacobian and decomposition if o.reuseJdec { o.reuseJdec = false } else { // calculate only first Jacobian for all iterations (simple/modified Newton's method) if o.reuseJ { o.reuseJ = false } else if !o.jacIsOK { // Jacobian triplet if o.jac == nil { // numerical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") } err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) { e = o.fcn(fy, o.h, x0, y, args...) return }, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable } else { // analytical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") } err = o.jac(&o.dfdyT, o.h, x0, y0, args...) } if err != nil { return } // create M matrix if o.doinit && !o.hasM { o.mTri = new(la.Triplet) if o.Distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz o.mTri.Init(o.ndim, o.ndim, endp1-start) for i := start; i < endp1; i++ { o.mTri.Put(i, i, 1.0) } } else { o.mTri.Init(o.ndim, o.ndim, o.ndim) for i := 0; i < o.ndim; i++ { o.mTri.Put(i, i, 1.0) } } } o.njeval += 1 o.jacIsOK = true } // initialise triplets if o.doinit { o.rctriR = new(la.Triplet) o.rctriC = new(la.TripletC) o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len()) xzmono := o.Distr o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono) } // update triplets la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT) // rctriR := γ*M - dfdy la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy // initialise solver if o.doinit { err = o.lsolR.InitR(o.rctriR, false, false, false) if err != nil { return } err = o.lsolC.InitC(o.rctriC, false, false, false) if err != nil { return } } // perform factorisation o.lsolR.Fact() o.lsolC.Fact() o.ndecomp += 1 } // updated u[i] o.u[0] = x0 + r5.c[0]*o.h o.u[1] = x0 + r5.c[1]*o.h o.u[2] = x0 + r5.c[2]*o.h // (trial/initial) updated z[i] and w[i] if o.first || o.ZeroTrial { for m := 0; m < o.ndim; m++ { o.z[0][m], o.w[0][m] = 0.0, 0.0 o.z[1][m], o.w[1][m] = 0.0, 0.0 o.z[2][m], o.w[2][m] = 0.0, 0.0 } } else { c3q := o.h / o.hprev c1q := r5.μ1 * c3q c2q := r5.μ2 * c3q for m := 0; m < o.ndim; m++ { o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m])) o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m])) o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m])) o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m] o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m] o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m] } } // iterations o.nit = 0 o.η = math.Pow(max(o.η, o.ϵ), 0.8) o.θ = o.θmax o.diverg = false var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64 var it int for it = 0; it < o.NmaxIt; it++ { // max iterations ? o.nit = it + 1 if o.nit > o.nitmax { o.nitmax = o.nit } // evaluate f(x,y) at (u[i],v[i]=y0+z[i]) for i := 0; i < 3; i++ { for m := 0; m < o.ndim; m++ { o.v[i][m] = y0[m] + o.z[i][m] } o.nfeval += 1 err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...) if err != nil { return } } // calc rhs if o.hasM { // using δw as workspace here la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0 la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1 la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2 if o.Distr { mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here } for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m] } } else { for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m] } } // solve linear system o.nlinsol += 1 var errR, errC error if !o.Distr && o.Pll { wg := new(sync.WaitGroup) wg.Add(2) go func() { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) wg.Done() }() go func() { errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) wg.Done() }() wg.Wait() } else { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) } // check for errors from linear solution if errR != nil || errC != nil { var errmsg string if errR != nil { errmsg += errR.Error() } if errC != nil { if errR != nil { errmsg += "\n" } errmsg += errC.Error() } err = errors.New(errmsg) return } // update w and z for m := 0; m < o.ndim; m++ { o.w[0][m] += o.δw[0][m] o.w[1][m] += o.δw[1][m] o.w[2][m] += o.δw[2][m] o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m] o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m] o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m] } // rms norm of δw Lδw = 0.0 for m := 0; m < o.ndim; m++ { Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0) } Lδw = math.Sqrt(Lδw / float64(3*o.ndim)) // check convergence if it > 0 { thq = Lδw / oLδw if it == 1 { o.θ = thq } else { o.θ = math.Sqrt(thq * othq) } othq = thq if o.θ < 0.99 { o.η = o.θ / (1.0 - o.θ) iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ) itRerr = iterr / o.fnewt if itRerr >= 1.0 { // diverging qnewt = max(1.0e-4, min(20.0, itRerr)) o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit))) o.diverg = true break } } else { // diverging badly (unexpected step-rejection) o.dvfac = 0.5 o.diverg = true break } } // save old norm oLδw = Lδw // converged if o.η*Lδw < o.fnewt { break } } // did not converge if it == o.NmaxIt-1 { chk.Panic("radau5_step failed with it=%d", it) } // diverging => stop if o.diverg { rerr = 2.0 // must leave state intact, any rerr is OK return } // error estimate if o.LerrStrat == 1 { // simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems) for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m] rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0) } rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10) } else { // common if o.hasM { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] } if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] + γ*o.ez[m] } } // HW-VII p123 Eq.(8.19) if o.LerrStrat == 2 { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) // HW-VII p123 Eq.(8.20) } else { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) if !(rerr < 1.0) { if o.first || o.reject { for m := 0; m < o.ndim; m++ { o.v[0][m] = y0[m] + o.lerr[m] // y0perr } o.nfeval += 1 err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr if err != nil { return } if o.hasM { la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez } o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) } } } } return }