func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test ODE 02b") io.Pfcyan("Hairer-Wanner VII-p5 Eq.(1.5) Van der Pol's Equation (MPI)\n") } if mpi.Size() != 2 { chk.Panic(">> error: this test requires 2 MPI processors\n") return } eps := 1.0e-6 w := make([]float64, 2) // workspace fcn := func(f []float64, x float64, y []float64, args ...interface{}) error { f[0], f[1] = 0, 0 switch mpi.Rank() { case 0: f[0] = y[1] case 1: f[1] = ((1.0-y[0]*y[0])*y[1] - y[0]) / eps } // join all f mpi.AllReduceSum(f, w) return nil } jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error { if dfdy.Max() == 0 { dfdy.Init(2, 2, 4) } dfdy.Start() if false { // per column switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) case 1: dfdy.Put(0, 1, 1.0) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } else { // per row switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(0, 1, 1.0) case 1: dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } return nil } // data silent := false fixstp := false //method := "Dopri5" method := "Radau5" xa, xb := 0.0, 2.0 ya := []float64{2.0, -0.6} ndim := len(ya) // output var b bytes.Buffer out := func(first bool, dx, x float64, y []float64, args ...interface{}) error { if mpi.Rank() == 0 { if first { fmt.Fprintf(&b, "%23s %23s %23s %23s\n", "dx", "x", "y0", "y1") } fmt.Fprintf(&b, "%23.15E %23.15E %23.15E %23.15E\n", dx, x, y[0], y[1]) } return nil } defer func() { if mpi.Rank() == 0 { extra := "d2 = Read('data/vdpol_radau5_for.dat')\n" + "subplot(3,1,1)\n" + "plot(d2['x'],d2['y0'],'k+',label='res',ms=10)\n" + "subplot(3,1,2)\n" + "plot(d2['x'],d2['y1'],'k+',label='res',ms=10)\n" ode.Plot("/tmp/gosl", "vdpolB", method, &b, []int{0, 1}, ndim, nil, xa, xb, true, false, extra) } }() // one run var o ode.ODE o.Distr = true //numjac := true numjac := false if numjac { o.Init(method, ndim, fcn, nil, nil, out, silent) } else { o.Init(method, ndim, fcn, jac, nil, out, silent) } // tolerances and initial step size rtol := 1e-4 atol := rtol o.SetTol(atol, rtol) o.IniH = 1.0e-4 //o.NmaxSS = 2 y := make([]float64, ndim) copy(y, ya) t0 := time.Now() if fixstp { o.Solve(y, xa, xb, 0.05, fixstp) } else { o.Solve(y, xa, xb, xb-xa, fixstp) } if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) } }
// run_iterations solves the nonlinear problem func run_iterations(t, Δt float64, d *Domain, sum *Summary) (diverging, ok bool) { // zero accumulated increments la.VecFill(d.Sol.ΔY, 0) // calculate global starred vectors and interpolate starred variables from nodes to integration points if LogErr(d.star_vars(Δt), "cannot compute starred variables") { return } // auxiliary variables var it int var largFb, largFb0, Lδu float64 var prevFb, prevLδu float64 // message if Global.Sim.Data.ShowR { io.Pf("\n%13s%4s%23s%23s\n", "t", "it", "largFb", "Lδu") defer func() { io.Pf("%13.6e%4d%23.15e%23.15e\n", t, it, largFb, Lδu) }() } // iterations for it = 0; it < Global.Sim.Solver.NmaxIt; it++ { // assemble right-hand side vector (fb) with negative of residuals la.VecFill(d.Fb, 0) for _, e := range d.Elems { if !e.AddToRhs(d.Fb, d.Sol) { break } } if Stop() { return } // join all fb if Global.Distr { mpi.AllReduceSum(d.Fb, d.Wb) // this must be done here because there might be nodes sharing boundary conditions } // point natural boundary conditions; e.g. concentrated loads d.PtNatBcs.AddToRhs(d.Fb, t) // essential boundary conditioins; e.g. constraints d.EssenBcs.AddToRhs(d.Fb, d.Sol) // debug if Global.Debug { //la.PrintVec("fb", d.Fb[:d.Ny], "%13.10f ", false) //panic("stop") } // find largest absolute component of fb largFb = la.VecLargest(d.Fb, 1) // save residual if Global.Stat { sum.Resids.Append(it == 0, largFb) } // check largFb value if it == 0 { // store largest absolute component of fb largFb0 = largFb } else { // check convergence on Lf0 if largFb < Global.Sim.Solver.FbTol*largFb0 { // converged on fb break } // check convergence on fb_min if largFb < Global.Sim.Solver.FbMin { // converged with smallest value of fb break } } // check divergence on fb if it > 1 && Global.Sim.Solver.DvgCtrl { if largFb > prevFb { diverging = true break } } prevFb = largFb // assemble Jacobian matrix do_asm_fact := (it == 0 || !Global.Sim.Data.CteTg) if do_asm_fact { // assemble element matrices d.Kb.Start() for _, e := range d.Elems { if !e.AddToKb(d.Kb, d.Sol, it == 0) { break } } if Stop() { return } // debug if Global.DebugKb != nil { Global.DebugKb(d, it) } // join A and tr(A) matrices into Kb if Global.Root { d.Kb.PutMatAndMatT(&d.EssenBcs.A) } // initialise linear solver if d.InitLSol { if LogErr(d.LinSol.InitR(d.Kb, Global.Sim.LinSol.Symmetric, Global.Sim.LinSol.Verbose, Global.Sim.LinSol.Timing), "cannot initialise linear solver") { return } d.InitLSol = false } // perform factorisation LogErr(d.LinSol.Fact(), "factorisation") if Stop() { return } } // debug //KK := d.Kb.ToMatrix(nil).ToDense() //la.PrintMat("KK", KK, "%20.10f", false) //panic("stop") // solve for wb := δyb LogErr(d.LinSol.SolveR(d.Wb, d.Fb, false), "solve") if Stop() { return } // debug if Global.Debug { //la.PrintVec("wb", d.Wb[:d.Ny], "%13.10f ", false) } // update primary variables (y) for i := 0; i < d.Ny; i++ { d.Sol.Y[i] += d.Wb[i] // y += δy d.Sol.ΔY[i] += d.Wb[i] // ΔY += δy } if !Global.Sim.Data.Steady { for _, I := range d.T1eqs { d.Sol.Dydt[I] = Global.DynCoefs.β1*d.Sol.Y[I] - d.Sol.Psi[I] } for _, I := range d.T2eqs { d.Sol.Dydt[I] = Global.DynCoefs.α4*d.Sol.Y[I] - d.Sol.Chi[I] d.Sol.D2ydt2[I] = Global.DynCoefs.α1*d.Sol.Y[I] - d.Sol.Zet[I] } } // update Lagrange multipliers (λ) for i := 0; i < d.Nlam; i++ { d.Sol.L[i] += d.Wb[d.Ny+i] // λ += δλ } // backup / restore if it == 0 { // create backup copy of all secondary variables for _, e := range d.ElemIntvars { e.BackupIvs(false) } } else { // recover last converged state from backup copy for _, e := range d.ElemIntvars { e.RestoreIvs(false) } } // update secondary variables for _, e := range d.Elems { if !e.Update(d.Sol) { break } } if Stop() { return } // compute RMS norm of δu and check convegence on δu Lδu = la.VecRmsErr(d.Wb[:d.Ny], Global.Sim.Solver.Atol, Global.Sim.Solver.Rtol, d.Sol.Y[:d.Ny]) // message if Global.Sim.Data.ShowR { io.Pf("%13.6e%4d%23.15e%23.15e\n", t, it, largFb, Lδu) } // stop if converged on δu if Lδu < Global.Sim.Solver.Itol { break } // check divergence on Lδu if it > 1 && Global.Sim.Solver.DvgCtrl { if Lδu > prevLδu { diverging = true break } } prevLδu = Lδu } // check if iterations diverged if it == Global.Sim.Solver.NmaxIt { io.PfMag("max number of iterations reached: it = %d\n", it) return } // success ok = true return }
// run_iterations solves the nonlinear problem func run_iterations(t, Δt float64, d *Domain, dc *DynCoefs, sum *Summary, dbgKb DebugKb_t) (diverging bool, err error) { // zero accumulated increments la.VecFill(d.Sol.ΔY, 0) // calculate global starred vectors and interpolate starred variables from nodes to integration points if !d.Sim.Data.Steady { // compute starred vectors for _, I := range d.T1eqs { d.Sol.Psi[I] = dc.β1*d.Sol.Y[I] + dc.β2*d.Sol.Dydt[I] } for _, I := range d.T2eqs { d.Sol.Zet[I] = dc.α1*d.Sol.Y[I] + dc.α2*d.Sol.Dydt[I] + dc.α3*d.Sol.D2ydt2[I] d.Sol.Chi[I] = dc.α4*d.Sol.Y[I] + dc.α5*d.Sol.Dydt[I] + dc.α6*d.Sol.D2ydt2[I] } // set internal starred variables for _, e := range d.Elems { err = e.InterpStarVars(d.Sol) if err != nil { err = chk.Err("cannot compute starred variables:\n%v", err) return } } } // auxiliary variables var it int var largFb, largFb0, Lδu float64 var prevFb, prevLδu float64 dat := d.Sim.Solver // message if dat.ShowR { io.Pf("\n%13s%4s%23s%23s\n", "t", "it", "largFb", "Lδu") defer func() { io.Pf("%13.6e%4d%23.15e%23.15e\n", t, it, largFb, Lδu) }() } // iterations for it = 0; it < dat.NmaxIt; it++ { // assemble right-hand side vector (fb) with negative of residuals la.VecFill(d.Fb, 0) for _, e := range d.Elems { err = e.AddToRhs(d.Fb, d.Sol) if err != nil { return } } // join all fb if d.Distr { mpi.AllReduceSum(d.Fb, d.Wb) // this must be done here because there might be nodes sharing boundary conditions } // point natural boundary conditions; e.g. concentrated loads d.PtNatBcs.AddToRhs(d.Fb, t) // essential boundary conditioins; e.g. constraints d.EssenBcs.AddToRhs(d.Fb, d.Sol) // find largest absolute component of fb largFb = la.VecLargest(d.Fb, 1) // save residual if d.Sim.Data.Stat { if sum != nil { sum.Resids.Append(it == 0, largFb) } } // check largFb value if it == 0 { // store largest absolute component of fb largFb0 = largFb } else { // check convergence on Lf0 if largFb < dat.FbTol*largFb0 { // converged on fb break } // check convergence on fb_min if largFb < dat.FbMin { // converged with smallest value of fb break } } // check divergence on fb if it > 1 && dat.DvgCtrl { if largFb > prevFb { diverging = true break } } prevFb = largFb // assemble Jacobian matrix do_asm_fact := (it == 0 || !dat.CteTg) if do_asm_fact { // assemble element matrices d.Kb.Start() for _, e := range d.Elems { err = e.AddToKb(d.Kb, d.Sol, it == 0) if err != nil { return } } // debug if dbgKb != nil { dbgKb(d, it) } // join A and tr(A) matrices into Kb if d.Proc == 0 { d.Kb.PutMatAndMatT(&d.EssenBcs.A) } // initialise linear solver if d.InitLSol { err = d.LinSol.InitR(d.Kb, d.Sim.LinSol.Symmetric, d.Sim.LinSol.Verbose, d.Sim.LinSol.Timing) if err != nil { err = chk.Err("cannot initialise linear solver:\n%v", err) return } d.InitLSol = false } // perform factorisation err = d.LinSol.Fact() if err != nil { err = chk.Err("factorisation failed:\n%v", err) return } } // solve for wb := δyb err = d.LinSol.SolveR(d.Wb, d.Fb, false) if err != nil { err = chk.Err("solve failed:%v\n", err) return } // update primary variables (y) for i := 0; i < d.Ny; i++ { d.Sol.Y[i] += d.Wb[i] // y += δy d.Sol.ΔY[i] += d.Wb[i] // ΔY += δy } if !d.Sim.Data.Steady { for _, I := range d.T1eqs { d.Sol.Dydt[I] = dc.β1*d.Sol.Y[I] - d.Sol.Psi[I] } for _, I := range d.T2eqs { d.Sol.Dydt[I] = dc.α4*d.Sol.Y[I] - d.Sol.Chi[I] d.Sol.D2ydt2[I] = dc.α1*d.Sol.Y[I] - d.Sol.Zet[I] } } // update Lagrange multipliers (λ) for i := 0; i < d.Nlam; i++ { d.Sol.L[i] += d.Wb[d.Ny+i] // λ += δλ } // backup / restore if it == 0 { // create backup copy of all secondary variables for _, e := range d.ElemIntvars { e.BackupIvs(false) } } else { // recover last converged state from backup copy for _, e := range d.ElemIntvars { e.RestoreIvs(false) } } // update secondary variables for _, e := range d.Elems { err = e.Update(d.Sol) if err != nil { break } } // compute RMS norm of δu and check convegence on δu Lδu = la.VecRmsErr(d.Wb[:d.Ny], dat.Atol, dat.Rtol, d.Sol.Y[:d.Ny]) // message if dat.ShowR { io.Pf("%13.6e%4d%23.15e%23.15e\n", t, it, largFb, Lδu) } // stop if converged on δu if Lδu < dat.Itol { break } // check divergence on Lδu if it > 1 && dat.DvgCtrl { if Lδu > prevLδu { diverging = true break } } prevLδu = Lδu } // check if iterations diverged if it == dat.NmaxIt { err = chk.Err("max number of iterations reached: it = %d\n", it) } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test ODE 04b (MPI)") io.Pfcyan("Hairer-Wanner VII-p376 Transistor Amplifier (MPI)\n") io.Pfcyan("(from E Hairer's website, not the system in the book)\n") } if mpi.Size() != 3 { chk.Panic(">> error: this test requires 3 MPI processors\n") return } // RIGHT-HAND SIDE OF THE AMPLIFIER PROBLEM w := make([]float64, 8) // workspace fcn := func(f []float64, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) UET := d.UE * math.Sin(d.W*x) FAC1 := d.BETA * (math.Exp((y[3]-y[2])/d.UF) - 1.0) FAC2 := d.BETA * (math.Exp((y[6]-y[5])/d.UF) - 1.0) la.VecFill(f, 0) switch mpi.Rank() { case 0: f[0] = y[0] / d.R9 case 1: f[1] = (y[1]-d.UB)/d.R8 + d.ALPHA*FAC1 f[2] = y[2]/d.R7 - FAC1 case 2: f[3] = y[3]/d.R5 + (y[3]-d.UB)/d.R6 + (1.0-d.ALPHA)*FAC1 f[4] = (y[4]-d.UB)/d.R4 + d.ALPHA*FAC2 f[5] = y[5]/d.R3 - FAC2 f[6] = y[6]/d.R1 + (y[6]-d.UB)/d.R2 + (1.0-d.ALPHA)*FAC2 f[7] = (y[7] - UET) / d.R0 } mpi.AllReduceSum(f, w) return nil } // JACOBIAN OF THE AMPLIFIER PROBLEM jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) FAC14 := d.BETA * math.Exp((y[3]-y[2])/d.UF) / d.UF FAC27 := d.BETA * math.Exp((y[6]-y[5])/d.UF) / d.UF if dfdy.Max() == 0 { dfdy.Init(8, 8, 16) } NU := 2 dfdy.Start() switch mpi.Rank() { case 0: dfdy.Put(2+0-NU, 0, 1.0/d.R9) dfdy.Put(2+1-NU, 1, 1.0/d.R8) dfdy.Put(1+2-NU, 2, -d.ALPHA*FAC14) dfdy.Put(0+3-NU, 3, d.ALPHA*FAC14) dfdy.Put(2+2-NU, 2, 1.0/d.R7+FAC14) case 1: dfdy.Put(1+3-NU, 3, -FAC14) dfdy.Put(2+3-NU, 3, 1.0/d.R5+1.0/d.R6+(1.0-d.ALPHA)*FAC14) dfdy.Put(3+2-NU, 2, -(1.0-d.ALPHA)*FAC14) dfdy.Put(2+4-NU, 4, 1.0/d.R4) dfdy.Put(1+5-NU, 5, -d.ALPHA*FAC27) case 2: dfdy.Put(0+6-NU, 6, d.ALPHA*FAC27) dfdy.Put(2+5-NU, 5, 1.0/d.R3+FAC27) dfdy.Put(1+6-NU, 6, -FAC27) dfdy.Put(2+6-NU, 6, 1.0/d.R1+1.0/d.R2+(1.0-d.ALPHA)*FAC27) dfdy.Put(3+5-NU, 5, -(1.0-d.ALPHA)*FAC27) dfdy.Put(2+7-NU, 7, 1.0/d.R0) } return nil } // MATRIX "M" c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6 var M la.Triplet M.Init(8, 8, 14) M.Start() NU := 1 switch mpi.Rank() { case 0: M.Put(1+0-NU, 0, -c5) M.Put(0+1-NU, 1, c5) M.Put(2+0-NU, 0, c5) M.Put(1+1-NU, 1, -c5) M.Put(1+2-NU, 2, -c4) M.Put(1+3-NU, 3, -c3) case 1: M.Put(0+4-NU, 4, c3) M.Put(2+3-NU, 3, c3) M.Put(1+4-NU, 4, -c3) case 2: M.Put(1+5-NU, 5, -c2) M.Put(1+6-NU, 6, -c1) M.Put(0+7-NU, 7, c1) M.Put(2+6-NU, 6, c1) M.Put(1+7-NU, 7, -c1) } // WRITE FILE FUNCTION idxstp := 1 var b bytes.Buffer out := func(first bool, dx, x float64, y []float64, args ...interface{}) error { if mpi.Rank() == 0 { if first { fmt.Fprintf(&b, "%6s%23s%23s%23s%23s%23s%23s%23s%23s%23s\n", "ns", "x", "y0", "y1", "y2", "y3", "y4", "y5", "y6", "y7") } fmt.Fprintf(&b, "%6d%23.15E", idxstp, x) for j := 0; j < len(y); j++ { fmt.Fprintf(&b, "%23.15E", y[j]) } fmt.Fprintf(&b, "\n") idxstp += 1 } return nil } defer func() { if mpi.Rank() == 0 { io.WriteFileD("/tmp/gosl", "hwamplifierB.res", &b) } }() // INITIAL DATA D, xa, xb, ya := HWtransIni() // SET ODE SOLVER silent := false fixstp := false //method := "Dopri5" method := "Radau5" ndim := len(ya) //numjac := true numjac := false var osol ode.ODE osol.Pll = true if numjac { osol.Init(method, ndim, fcn, nil, &M, out, silent) } else { osol.Init(method, ndim, fcn, jac, &M, out, silent) } osol.IniH = 1.0e-6 // initial step size // SET TOLERANCES atol, rtol := 1e-11, 1e-5 osol.SetTol(atol, rtol) // RUN t0 := time.Now() if fixstp { osol.Solve(ya, xa, xb, 0.01, fixstp, &D) } else { osol.Solve(ya, xa, xb, xb-xa, fixstp, &D) } if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) } }
// Radau5 step function func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) { // factors α := r5.α_ / o.h β := r5.β_ / o.h γ := r5.γ_ / o.h // Jacobian and decomposition if o.reuseJdec { o.reuseJdec = false } else { // calculate only first Jacobian for all iterations (simple/modified Newton's method) if o.reuseJ { o.reuseJ = false } else if !o.jacIsOK { // Jacobian triplet if o.jac == nil { // numerical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") } err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) { e = o.fcn(fy, o.h, x0, y, args...) return }, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable } else { // analytical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") } err = o.jac(&o.dfdyT, o.h, x0, y0, args...) } if err != nil { return } // create M matrix if o.doinit && !o.hasM { o.mTri = new(la.Triplet) if o.Distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz o.mTri.Init(o.ndim, o.ndim, endp1-start) for i := start; i < endp1; i++ { o.mTri.Put(i, i, 1.0) } } else { o.mTri.Init(o.ndim, o.ndim, o.ndim) for i := 0; i < o.ndim; i++ { o.mTri.Put(i, i, 1.0) } } } o.njeval += 1 o.jacIsOK = true } // initialise triplets if o.doinit { o.rctriR = new(la.Triplet) o.rctriC = new(la.TripletC) o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len()) xzmono := o.Distr o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono) } // update triplets la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT) // rctriR := γ*M - dfdy la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy // initialise solver if o.doinit { err = o.lsolR.InitR(o.rctriR, false, false, false) if err != nil { return } err = o.lsolC.InitC(o.rctriC, false, false, false) if err != nil { return } } // perform factorisation o.lsolR.Fact() o.lsolC.Fact() o.ndecomp += 1 } // updated u[i] o.u[0] = x0 + r5.c[0]*o.h o.u[1] = x0 + r5.c[1]*o.h o.u[2] = x0 + r5.c[2]*o.h // (trial/initial) updated z[i] and w[i] if o.first || o.ZeroTrial { for m := 0; m < o.ndim; m++ { o.z[0][m], o.w[0][m] = 0.0, 0.0 o.z[1][m], o.w[1][m] = 0.0, 0.0 o.z[2][m], o.w[2][m] = 0.0, 0.0 } } else { c3q := o.h / o.hprev c1q := r5.μ1 * c3q c2q := r5.μ2 * c3q for m := 0; m < o.ndim; m++ { o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m])) o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m])) o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m])) o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m] o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m] o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m] } } // iterations o.nit = 0 o.η = math.Pow(max(o.η, o.ϵ), 0.8) o.θ = o.θmax o.diverg = false var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64 var it int for it = 0; it < o.NmaxIt; it++ { // max iterations ? o.nit = it + 1 if o.nit > o.nitmax { o.nitmax = o.nit } // evaluate f(x,y) at (u[i],v[i]=y0+z[i]) for i := 0; i < 3; i++ { for m := 0; m < o.ndim; m++ { o.v[i][m] = y0[m] + o.z[i][m] } o.nfeval += 1 err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...) if err != nil { return } } // calc rhs if o.hasM { // using δw as workspace here la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0 la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1 la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2 if o.Distr { mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here } for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m] } } else { for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m] } } // solve linear system o.nlinsol += 1 var errR, errC error if !o.Distr && o.Pll { wg := new(sync.WaitGroup) wg.Add(2) go func() { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) wg.Done() }() go func() { errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) wg.Done() }() wg.Wait() } else { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) } // check for errors from linear solution if errR != nil || errC != nil { var errmsg string if errR != nil { errmsg += errR.Error() } if errC != nil { if errR != nil { errmsg += "\n" } errmsg += errC.Error() } err = errors.New(errmsg) return } // update w and z for m := 0; m < o.ndim; m++ { o.w[0][m] += o.δw[0][m] o.w[1][m] += o.δw[1][m] o.w[2][m] += o.δw[2][m] o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m] o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m] o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m] } // rms norm of δw Lδw = 0.0 for m := 0; m < o.ndim; m++ { Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0) } Lδw = math.Sqrt(Lδw / float64(3*o.ndim)) // check convergence if it > 0 { thq = Lδw / oLδw if it == 1 { o.θ = thq } else { o.θ = math.Sqrt(thq * othq) } othq = thq if o.θ < 0.99 { o.η = o.θ / (1.0 - o.θ) iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ) itRerr = iterr / o.fnewt if itRerr >= 1.0 { // diverging qnewt = max(1.0e-4, min(20.0, itRerr)) o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit))) o.diverg = true break } } else { // diverging badly (unexpected step-rejection) o.dvfac = 0.5 o.diverg = true break } } // save old norm oLδw = Lδw // converged if o.η*Lδw < o.fnewt { break } } // did not converge if it == o.NmaxIt-1 { chk.Panic("radau5_step failed with it=%d", it) } // diverging => stop if o.diverg { rerr = 2.0 // must leave state intact, any rerr is OK return } // error estimate if o.LerrStrat == 1 { // simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems) for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m] rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0) } rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10) } else { // common if o.hasM { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] } if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] + γ*o.ez[m] } } // HW-VII p123 Eq.(8.19) if o.LerrStrat == 2 { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) // HW-VII p123 Eq.(8.20) } else { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) if !(rerr < 1.0) { if o.first || o.reject { for m := 0; m < o.ndim; m++ { o.v[0][m] = y0[m] + o.lerr[m] // y0perr } o.nfeval += 1 err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr if err != nil { return } if o.hasM { la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez } o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) } } } } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { io.PfYel("\nTest MPI 01\n") } if mpi.Size() != 3 { chk.Panic("this test needs 3 processors") } n := 11 x := make([]float64, n) id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*n)/sz, ((id+1)*n)/sz for i := start; i < endp1; i++ { x[i] = float64(i) } // Barrier mpi.Barrier() io.Pfgrey("x @ proc # %d = %v\n", id, x) // SumToRoot r := make([]float64, n) mpi.SumToRoot(r, x) var tst testing.T if id == 0 { chk.Vector(&tst, fmt.Sprintf("SumToRoot: r @ proc # %d", id), 1e-17, r, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) } else { chk.Vector(&tst, fmt.Sprintf("SumToRoot: r @ proc # %d", id), 1e-17, r, make([]float64, n)) } // BcastFromRoot r[0] = 666 mpi.BcastFromRoot(r) chk.Vector(&tst, fmt.Sprintf("BcastFromRoot: r @ proc # %d", id), 1e-17, r, []float64{666, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) // AllReduceSum setslice(x) w := make([]float64, n) mpi.AllReduceSum(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceSum: w @ proc # %d", id), 1e-17, w, []float64{110, 110, 110, 1021, 1021, 1021, 2032, 2032, 2032, 3043, 3043}) // AllReduceSumAdd setslice(x) y := []float64{-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000} mpi.AllReduceSumAdd(y, x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceSumAdd: y @ proc # %d", id), 1e-17, y, []float64{-890, -890, -890, 21, 21, 21, 1032, 1032, 1032, 2043, 2043}) // AllReduceMin setslice(x) mpi.AllReduceMin(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceMin: x @ proc # %d", id), 1e-17, x, []float64{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}) // AllReduceMax setslice(x) mpi.AllReduceMax(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceMax: x @ proc # %d", id), 1e-17, x, []float64{100, 100, 100, 1000, 1000, 1000, 2000, 2000, 2000, 3000, 3000}) }
// solve_linear_problem solves the linear problem func solve_linear_problem(t float64, d *Domain, dc *DynCoefs, sum *Summary, first bool) (err error) { // assemble right-hand side vector (fb) with **negative** of residuals la.VecFill(d.Fb, 0) for _, e := range d.Elems { err = e.AddToRhs(d.Fb, d.Sol) if err != nil { return } } // join all fb if d.Distr { mpi.AllReduceSum(d.Fb, d.Wb) // this must be done here because there might be nodes sharing boundary conditions } // point natural boundary conditions; e.g. concentrated loads d.PtNatBcs.AddToRhs(d.Fb, t) // essential boundary conditioins; e.g. constraints d.EssenBcs.AddToRhs(d.Fb, d.Sol) // assemble and factorise Jacobian matrix just once if first { // assemble element matrices d.Kb.Start() for _, e := range d.Elems { err = e.AddToKb(d.Kb, d.Sol, true) if err != nil { return } } // join A and tr(A) matrices into Kb if d.Proc == 0 { d.Kb.PutMatAndMatT(&d.EssenBcs.A) } // initialise linear solver (just once) if d.InitLSol { err = d.LinSol.InitR(d.Kb, d.Sim.LinSol.Symmetric, d.Sim.LinSol.Verbose, d.Sim.LinSol.Timing) if err != nil { err = chk.Err("cannot initialise linear solver:\n%v", err) return } d.InitLSol = false } // perform factorisation (always if not CteTg) err = d.LinSol.Fact() if err != nil { err = chk.Err("factorisation failed:\n%v", err) return } } // solve for wb err = d.LinSol.SolveR(d.Wb, d.Fb, false) if err != nil { err = chk.Err("solve failed:%v\n", err) return } // update primary variables (y) for i := 0; i < d.Ny; i++ { d.Sol.Y[i] += d.Wb[i] // y += δy d.Sol.ΔY[i] = d.Wb[i] // ΔY = δy } // update Lagrange multipliers (λ) for i := 0; i < d.Nlam; i++ { d.Sol.L[i] += d.Wb[d.Ny+i] // λ += δλ } // update secondary variables for _, e := range d.Elems { err = e.Update(d.Sol) if err != nil { break } } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("ode04: Hairer-Wanner VII-p376 Transistor Amplifier\n") } if mpi.Size() != 3 { chk.Panic(">> error: this test requires 3 MPI processors\n") return } // data UE, UB, UF, ALPHA, BETA := 0.1, 6.0, 0.026, 0.99, 1.0e-6 R0, R1, R2, R3, R4, R5 := 1000.0, 9000.0, 9000.0, 9000.0, 9000.0, 9000.0 R6, R7, R8, R9 := 9000.0, 9000.0, 9000.0, 9000.0 W := 2.0 * 3.141592654 * 100.0 // initial values xa := 0.0 ya := []float64{0.0, UB, UB / (R6/R5 + 1.0), UB / (R6/R5 + 1.0), UB, UB / (R2/R1 + 1.0), UB / (R2/R1 + 1.0), 0.0} // endpoint of integration xb := 0.05 //xb = 0.0123 // OK //xb = 0.01235 // !OK // right-hand side of the amplifier problem w := make([]float64, 8) // workspace fcn := func(f []float64, dx, x float64, y []float64, args ...interface{}) error { UET := UE * math.Sin(W*x) FAC1 := BETA * (math.Exp((y[3]-y[2])/UF) - 1.0) FAC2 := BETA * (math.Exp((y[6]-y[5])/UF) - 1.0) la.VecFill(f, 0) switch mpi.Rank() { case 0: f[0] = y[0] / R9 case 1: f[1] = (y[1]-UB)/R8 + ALPHA*FAC1 f[2] = y[2]/R7 - FAC1 case 2: f[3] = y[3]/R5 + (y[3]-UB)/R6 + (1.0-ALPHA)*FAC1 f[4] = (y[4]-UB)/R4 + ALPHA*FAC2 f[5] = y[5]/R3 - FAC2 f[6] = y[6]/R1 + (y[6]-UB)/R2 + (1.0-ALPHA)*FAC2 f[7] = (y[7] - UET) / R0 } mpi.AllReduceSum(f, w) return nil } // Jacobian of the amplifier problem jac := func(dfdy *la.Triplet, dx, x float64, y []float64, args ...interface{}) error { FAC14 := BETA * math.Exp((y[3]-y[2])/UF) / UF FAC27 := BETA * math.Exp((y[6]-y[5])/UF) / UF if dfdy.Max() == 0 { dfdy.Init(8, 8, 16) } NU := 2 dfdy.Start() switch mpi.Rank() { case 0: dfdy.Put(2+0-NU, 0, 1.0/R9) dfdy.Put(2+1-NU, 1, 1.0/R8) dfdy.Put(1+2-NU, 2, -ALPHA*FAC14) dfdy.Put(0+3-NU, 3, ALPHA*FAC14) dfdy.Put(2+2-NU, 2, 1.0/R7+FAC14) case 1: dfdy.Put(1+3-NU, 3, -FAC14) dfdy.Put(2+3-NU, 3, 1.0/R5+1.0/R6+(1.0-ALPHA)*FAC14) dfdy.Put(3+2-NU, 2, -(1.0-ALPHA)*FAC14) dfdy.Put(2+4-NU, 4, 1.0/R4) dfdy.Put(1+5-NU, 5, -ALPHA*FAC27) case 2: dfdy.Put(0+6-NU, 6, ALPHA*FAC27) dfdy.Put(2+5-NU, 5, 1.0/R3+FAC27) dfdy.Put(1+6-NU, 6, -FAC27) dfdy.Put(2+6-NU, 6, 1.0/R1+1.0/R2+(1.0-ALPHA)*FAC27) dfdy.Put(3+5-NU, 5, -(1.0-ALPHA)*FAC27) dfdy.Put(2+7-NU, 7, 1.0/R0) } return nil } // matrix "M" c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6 var M la.Triplet M.Init(8, 8, 14) M.Start() NU := 1 switch mpi.Rank() { case 0: M.Put(1+0-NU, 0, -c5) M.Put(0+1-NU, 1, c5) M.Put(2+0-NU, 0, c5) M.Put(1+1-NU, 1, -c5) M.Put(1+2-NU, 2, -c4) M.Put(1+3-NU, 3, -c3) case 1: M.Put(0+4-NU, 4, c3) M.Put(2+3-NU, 3, c3) M.Put(1+4-NU, 4, -c3) case 2: M.Put(1+5-NU, 5, -c2) M.Put(1+6-NU, 6, -c1) M.Put(0+7-NU, 7, c1) M.Put(2+6-NU, 6, c1) M.Put(1+7-NU, 7, -c1) } // flags silent := false fixstp := false //method := "Dopri5" method := "Radau5" ndim := len(ya) numjac := false // structure to hold numerical results res := ode.Results{Method: method} // ODE solver var osol ode.Solver osol.Pll = true // solve problem if numjac { osol.Init(method, ndim, fcn, nil, &M, ode.SimpleOutput, silent) } else { osol.Init(method, ndim, fcn, jac, &M, ode.SimpleOutput, silent) } osol.IniH = 1.0e-6 // initial step size // set tolerances atol, rtol := 1e-11, 1e-5 osol.SetTol(atol, rtol) // run t0 := time.Now() if fixstp { osol.Solve(ya, xa, xb, 0.01, fixstp, &res) } else { osol.Solve(ya, xa, xb, xb-xa, fixstp, &res) } // plot if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) plt.SetForEps(2.0, 400) args := "'b-', marker='.', lw=1, clip_on=0" ode.Plot("/tmp/gosl/ode", "hwamplifier_mpi.eps", &res, nil, xa, xb, "", args, func() { _, T, err := io.ReadTable("data/radau5_hwamplifier.dat") if err != nil { chk.Panic("%v", err) } for j := 0; j < ndim; j++ { plt.Subplot(ndim+1, 1, j+1) plt.Plot(T["x"], T[io.Sf("y%d", j)], "'k+',label='reference',ms=10") } }) } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("ode02: Hairer-Wanner VII-p5 Eq.(1.5) Van der Pol's Equation") } if mpi.Size() != 2 { chk.Panic(">> error: this test requires 2 MPI processors\n") return } eps := 1.0e-6 w := make([]float64, 2) // workspace fcn := func(f []float64, dx, x float64, y []float64, args ...interface{}) error { f[0], f[1] = 0, 0 switch mpi.Rank() { case 0: f[0] = y[1] case 1: f[1] = ((1.0-y[0]*y[0])*y[1] - y[0]) / eps } // join all f mpi.AllReduceSum(f, w) return nil } jac := func(dfdy *la.Triplet, dx, x float64, y []float64, args ...interface{}) error { if dfdy.Max() == 0 { dfdy.Init(2, 2, 4) } dfdy.Start() if false { // per column switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) case 1: dfdy.Put(0, 1, 1.0) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } else { // per row switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(0, 1, 1.0) case 1: dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } return nil } // method and flags silent := false fixstp := false //method := "Dopri5" method := "Radau5" numjac := false xa, xb := 0.0, 2.0 ya := []float64{2.0, -0.6} ndim := len(ya) // structure to hold numerical results res := ode.Results{Method: method} // allocate ODE object var o ode.Solver o.Distr = true if numjac { o.Init(method, ndim, fcn, nil, nil, ode.SimpleOutput, silent) } else { o.Init(method, ndim, fcn, jac, nil, ode.SimpleOutput, silent) } // tolerances and initial step size rtol := 1e-4 atol := rtol o.IniH = 1.0e-4 o.SetTol(atol, rtol) //o.NmaxSS = 2 // solve problem y := make([]float64, ndim) copy(y, ya) t0 := time.Now() if fixstp { o.Solve(y, xa, xb, 0.05, fixstp, &res) } else { o.Solve(y, xa, xb, xb-xa, fixstp, &res) } // plot if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) plt.SetForEps(1.5, 400) args := "'b-', marker='.', lw=1, ms=4, clip_on=0" ode.Plot("/tmp/gosl/ode", "vdpolA_mpi.eps", &res, nil, xa, xb, "", args, func() { _, T, err := io.ReadTable("data/vdpol_radau5_for.dat") if err != nil { chk.Panic("%v", err) } plt.Subplot(3, 1, 1) plt.Plot(T["x"], T["y0"], "'k+',label='reference',ms=7") plt.Subplot(3, 1, 2) plt.Plot(T["x"], T["y1"], "'k+',label='reference',ms=7") }) } }