func IpBmatrix_sparse(B *la.Triplet, ndim, nne int, G [][]float64, radius float64, S []float64, axisym bool) { B.Start() if ndim == 3 { for i := 0; i < nne; i++ { B.Put(0, 0+i*3, G[i][0]) B.Put(1, 1+i*3, G[i][1]) B.Put(2, 2+i*3, G[i][2]) B.Put(3, 0+i*3, G[i][1]/SQ2) B.Put(4, 1+i*3, G[i][2]/SQ2) B.Put(5, 2+i*3, G[i][0]/SQ2) B.Put(3, 1+i*3, G[i][0]/SQ2) B.Put(4, 2+i*3, G[i][1]/SQ2) B.Put(5, 0+i*3, G[i][2]/SQ2) } return } if axisym { for i := 0; i < nne; i++ { B.Put(0, 0+i*2, G[i][0]) B.Put(1, 1+i*2, G[i][1]) B.Put(2, 0+i*2, S[i]/radius) B.Put(3, 0+i*2, G[i][1]/SQ2) B.Put(3, 1+i*2, G[i][0]/SQ2) } return } for i := 0; i < nne; i++ { B.Put(0, 0+i*2, G[i][0]) B.Put(1, 1+i*2, G[i][1]) B.Put(3, 0+i*2, G[i][1]/SQ2) B.Put(3, 1+i*2, G[i][0]/SQ2) } }
/* Jacobian ======== Calculates (with N=n-1): df0dx0, df0dx1, df0dx2, ... df0dxN df1dx0, df1dx1, df1dx2, ... df1dxN . . . . . . . . . . . . . dfNdx0, dfNdx1, dfNdx2, ... dfNdxN INPUT: ffcn : f(x) function x : station where dfdx has to be calculated fx : f @ x w : workspace with size == n == len(x) RETURNS: J : dfdx @ x [must be pre-allocated] */ func Jacobian(J *la.Triplet, ffcn Cb_f, x, fx, w []float64, distr bool) (err error) { ndim := len(x) start, endp1 := 0, ndim if distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 = (id*ndim)/sz, ((id+1)*ndim)/sz if J.Max() == 0 { J.Init(ndim, ndim, (endp1-start)*ndim) } } else { if J.Max() == 0 { J.Init(ndim, ndim, ndim*ndim) } } J.Start() // NOTE: cannot split calculation by columns unless the f function is // independently calculated by each MPI processor. // Otherwise, the AllReduce in f calculation would // join pieces of f from different processors calculated for // different x values (δx[col] from different columns). /* for col := start; col < endp1; col++ { xsafe := x[col] delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe))) x[col] = xsafe + delta ffcn(w, x) // fnew io.Pforan("x = %v, f = %v\n", x, w) for row := 0; row < ndim; row++ { J.Put(row, col, (w[row]-fx[row])/delta) } x[col] = xsafe } */ var df float64 for col := 0; col < ndim; col++ { xsafe := x[col] delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe))) x[col] = xsafe + delta err = ffcn(w, x) // w := f(x+δx[col]) if err != nil { return } for row := start; row < endp1; row++ { df = w[row] - fx[row] //if math.Abs(df) > EPS { J.Put(row, col, df/delta) //} } x[col] = xsafe } return }
func Assemble(K11, K12 *la.Triplet, F1 []float64, src Cb_src, g *Grid2D, e *Equations) { K11.Start() K12.Start() la.VecFill(F1, 0.0) kx, ky := 1.0, 1.0 alp, bet, gam := 2.0*(kx/g.Dxx+ky/g.Dyy), -kx/g.Dxx, -ky/g.Dyy mol := []float64{alp, bet, bet, gam, gam} for i, I := range e.RF1 { col, row := I%g.Nx, I/g.Nx nodes := []int{I, I - 1, I + 1, I - g.Nx, I + g.Nx} // I, left, right, bottom, top if col == 0 { nodes[1] = nodes[2] } if col == g.Nx-1 { nodes[2] = nodes[1] } if row == 0 { nodes[3] = nodes[4] } if row == g.Ny-1 { nodes[4] = nodes[3] } for k, J := range nodes { j1, j2 := e.FR1[J], e.FR2[J] // 1 or 2? if j1 > -1 { // 11 K11.Put(i, j1, mol[k]) } else { // 12 K12.Put(i, j2, mol[k]) } } if src != nil { x := float64(col) * g.Dx y := float64(row) * g.Dy F1[i] += src(x, y) } } }
// Jacobian computes Jacobian (sparse) matrix // Calculates (with N=n-1): // df0dx0, df0dx1, df0dx2, ... df0dxN // df1dx0, df1dx1, df1dx2, ... df1dxN // . . . . . . . . . . . . . // dfNdx0, dfNdx1, dfNdx2, ... dfNdxN // INPUT: // ffcn : f(x) function // x : station where dfdx has to be calculated // fx : f @ x // w : workspace with size == n == len(x) // RETURNS: // J : dfdx @ x [must be pre-allocated] func Jacobian(J *la.Triplet, ffcn Cb_f, x, fx, w []float64) (err error) { ndim := len(x) start, endp1 := 0, ndim if J.Max() == 0 { J.Init(ndim, ndim, ndim*ndim) } J.Start() var df float64 for col := 0; col < ndim; col++ { xsafe := x[col] delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe))) x[col] = xsafe + delta err = ffcn(w, x) // w := f(x+δx[col]) if err != nil { return } for row := start; row < endp1; row++ { df = w[row] - fx[row] J.Put(row, col, df/delta) } x[col] = xsafe } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test ODE 04b (MPI)") io.Pfcyan("Hairer-Wanner VII-p376 Transistor Amplifier (MPI)\n") io.Pfcyan("(from E Hairer's website, not the system in the book)\n") } if mpi.Size() != 3 { chk.Panic(">> error: this test requires 3 MPI processors\n") return } // RIGHT-HAND SIDE OF THE AMPLIFIER PROBLEM w := make([]float64, 8) // workspace fcn := func(f []float64, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) UET := d.UE * math.Sin(d.W*x) FAC1 := d.BETA * (math.Exp((y[3]-y[2])/d.UF) - 1.0) FAC2 := d.BETA * (math.Exp((y[6]-y[5])/d.UF) - 1.0) la.VecFill(f, 0) switch mpi.Rank() { case 0: f[0] = y[0] / d.R9 case 1: f[1] = (y[1]-d.UB)/d.R8 + d.ALPHA*FAC1 f[2] = y[2]/d.R7 - FAC1 case 2: f[3] = y[3]/d.R5 + (y[3]-d.UB)/d.R6 + (1.0-d.ALPHA)*FAC1 f[4] = (y[4]-d.UB)/d.R4 + d.ALPHA*FAC2 f[5] = y[5]/d.R3 - FAC2 f[6] = y[6]/d.R1 + (y[6]-d.UB)/d.R2 + (1.0-d.ALPHA)*FAC2 f[7] = (y[7] - UET) / d.R0 } mpi.AllReduceSum(f, w) return nil } // JACOBIAN OF THE AMPLIFIER PROBLEM jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) FAC14 := d.BETA * math.Exp((y[3]-y[2])/d.UF) / d.UF FAC27 := d.BETA * math.Exp((y[6]-y[5])/d.UF) / d.UF if dfdy.Max() == 0 { dfdy.Init(8, 8, 16) } NU := 2 dfdy.Start() switch mpi.Rank() { case 0: dfdy.Put(2+0-NU, 0, 1.0/d.R9) dfdy.Put(2+1-NU, 1, 1.0/d.R8) dfdy.Put(1+2-NU, 2, -d.ALPHA*FAC14) dfdy.Put(0+3-NU, 3, d.ALPHA*FAC14) dfdy.Put(2+2-NU, 2, 1.0/d.R7+FAC14) case 1: dfdy.Put(1+3-NU, 3, -FAC14) dfdy.Put(2+3-NU, 3, 1.0/d.R5+1.0/d.R6+(1.0-d.ALPHA)*FAC14) dfdy.Put(3+2-NU, 2, -(1.0-d.ALPHA)*FAC14) dfdy.Put(2+4-NU, 4, 1.0/d.R4) dfdy.Put(1+5-NU, 5, -d.ALPHA*FAC27) case 2: dfdy.Put(0+6-NU, 6, d.ALPHA*FAC27) dfdy.Put(2+5-NU, 5, 1.0/d.R3+FAC27) dfdy.Put(1+6-NU, 6, -FAC27) dfdy.Put(2+6-NU, 6, 1.0/d.R1+1.0/d.R2+(1.0-d.ALPHA)*FAC27) dfdy.Put(3+5-NU, 5, -(1.0-d.ALPHA)*FAC27) dfdy.Put(2+7-NU, 7, 1.0/d.R0) } return nil } // MATRIX "M" c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6 var M la.Triplet M.Init(8, 8, 14) M.Start() NU := 1 switch mpi.Rank() { case 0: M.Put(1+0-NU, 0, -c5) M.Put(0+1-NU, 1, c5) M.Put(2+0-NU, 0, c5) M.Put(1+1-NU, 1, -c5) M.Put(1+2-NU, 2, -c4) M.Put(1+3-NU, 3, -c3) case 1: M.Put(0+4-NU, 4, c3) M.Put(2+3-NU, 3, c3) M.Put(1+4-NU, 4, -c3) case 2: M.Put(1+5-NU, 5, -c2) M.Put(1+6-NU, 6, -c1) M.Put(0+7-NU, 7, c1) M.Put(2+6-NU, 6, c1) M.Put(1+7-NU, 7, -c1) } // WRITE FILE FUNCTION idxstp := 1 var b bytes.Buffer out := func(first bool, dx, x float64, y []float64, args ...interface{}) error { if mpi.Rank() == 0 { if first { fmt.Fprintf(&b, "%6s%23s%23s%23s%23s%23s%23s%23s%23s%23s\n", "ns", "x", "y0", "y1", "y2", "y3", "y4", "y5", "y6", "y7") } fmt.Fprintf(&b, "%6d%23.15E", idxstp, x) for j := 0; j < len(y); j++ { fmt.Fprintf(&b, "%23.15E", y[j]) } fmt.Fprintf(&b, "\n") idxstp += 1 } return nil } defer func() { if mpi.Rank() == 0 { io.WriteFileD("/tmp/gosl", "hwamplifierB.res", &b) } }() // INITIAL DATA D, xa, xb, ya := HWtransIni() // SET ODE SOLVER silent := false fixstp := false //method := "Dopri5" method := "Radau5" ndim := len(ya) //numjac := true numjac := false var osol ode.ODE osol.Pll = true if numjac { osol.Init(method, ndim, fcn, nil, &M, out, silent) } else { osol.Init(method, ndim, fcn, jac, &M, out, silent) } osol.IniH = 1.0e-6 // initial step size // SET TOLERANCES atol, rtol := 1e-11, 1e-5 osol.SetTol(atol, rtol) // RUN t0 := time.Now() if fixstp { osol.Solve(ya, xa, xb, 0.01, fixstp, &D) } else { osol.Solve(ya, xa, xb, xb-xa, fixstp, &D) } if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("ode04: Hairer-Wanner VII-p376 Transistor Amplifier\n") } if mpi.Size() != 3 { chk.Panic(">> error: this test requires 3 MPI processors\n") return } // data UE, UB, UF, ALPHA, BETA := 0.1, 6.0, 0.026, 0.99, 1.0e-6 R0, R1, R2, R3, R4, R5 := 1000.0, 9000.0, 9000.0, 9000.0, 9000.0, 9000.0 R6, R7, R8, R9 := 9000.0, 9000.0, 9000.0, 9000.0 W := 2.0 * 3.141592654 * 100.0 // initial values xa := 0.0 ya := []float64{0.0, UB, UB / (R6/R5 + 1.0), UB / (R6/R5 + 1.0), UB, UB / (R2/R1 + 1.0), UB / (R2/R1 + 1.0), 0.0} // endpoint of integration xb := 0.05 //xb = 0.0123 // OK //xb = 0.01235 // !OK // right-hand side of the amplifier problem w := make([]float64, 8) // workspace fcn := func(f []float64, dx, x float64, y []float64, args ...interface{}) error { UET := UE * math.Sin(W*x) FAC1 := BETA * (math.Exp((y[3]-y[2])/UF) - 1.0) FAC2 := BETA * (math.Exp((y[6]-y[5])/UF) - 1.0) la.VecFill(f, 0) switch mpi.Rank() { case 0: f[0] = y[0] / R9 case 1: f[1] = (y[1]-UB)/R8 + ALPHA*FAC1 f[2] = y[2]/R7 - FAC1 case 2: f[3] = y[3]/R5 + (y[3]-UB)/R6 + (1.0-ALPHA)*FAC1 f[4] = (y[4]-UB)/R4 + ALPHA*FAC2 f[5] = y[5]/R3 - FAC2 f[6] = y[6]/R1 + (y[6]-UB)/R2 + (1.0-ALPHA)*FAC2 f[7] = (y[7] - UET) / R0 } mpi.AllReduceSum(f, w) return nil } // Jacobian of the amplifier problem jac := func(dfdy *la.Triplet, dx, x float64, y []float64, args ...interface{}) error { FAC14 := BETA * math.Exp((y[3]-y[2])/UF) / UF FAC27 := BETA * math.Exp((y[6]-y[5])/UF) / UF if dfdy.Max() == 0 { dfdy.Init(8, 8, 16) } NU := 2 dfdy.Start() switch mpi.Rank() { case 0: dfdy.Put(2+0-NU, 0, 1.0/R9) dfdy.Put(2+1-NU, 1, 1.0/R8) dfdy.Put(1+2-NU, 2, -ALPHA*FAC14) dfdy.Put(0+3-NU, 3, ALPHA*FAC14) dfdy.Put(2+2-NU, 2, 1.0/R7+FAC14) case 1: dfdy.Put(1+3-NU, 3, -FAC14) dfdy.Put(2+3-NU, 3, 1.0/R5+1.0/R6+(1.0-ALPHA)*FAC14) dfdy.Put(3+2-NU, 2, -(1.0-ALPHA)*FAC14) dfdy.Put(2+4-NU, 4, 1.0/R4) dfdy.Put(1+5-NU, 5, -ALPHA*FAC27) case 2: dfdy.Put(0+6-NU, 6, ALPHA*FAC27) dfdy.Put(2+5-NU, 5, 1.0/R3+FAC27) dfdy.Put(1+6-NU, 6, -FAC27) dfdy.Put(2+6-NU, 6, 1.0/R1+1.0/R2+(1.0-ALPHA)*FAC27) dfdy.Put(3+5-NU, 5, -(1.0-ALPHA)*FAC27) dfdy.Put(2+7-NU, 7, 1.0/R0) } return nil } // matrix "M" c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6 var M la.Triplet M.Init(8, 8, 14) M.Start() NU := 1 switch mpi.Rank() { case 0: M.Put(1+0-NU, 0, -c5) M.Put(0+1-NU, 1, c5) M.Put(2+0-NU, 0, c5) M.Put(1+1-NU, 1, -c5) M.Put(1+2-NU, 2, -c4) M.Put(1+3-NU, 3, -c3) case 1: M.Put(0+4-NU, 4, c3) M.Put(2+3-NU, 3, c3) M.Put(1+4-NU, 4, -c3) case 2: M.Put(1+5-NU, 5, -c2) M.Put(1+6-NU, 6, -c1) M.Put(0+7-NU, 7, c1) M.Put(2+6-NU, 6, c1) M.Put(1+7-NU, 7, -c1) } // flags silent := false fixstp := false //method := "Dopri5" method := "Radau5" ndim := len(ya) numjac := false // structure to hold numerical results res := ode.Results{Method: method} // ODE solver var osol ode.Solver osol.Pll = true // solve problem if numjac { osol.Init(method, ndim, fcn, nil, &M, ode.SimpleOutput, silent) } else { osol.Init(method, ndim, fcn, jac, &M, ode.SimpleOutput, silent) } osol.IniH = 1.0e-6 // initial step size // set tolerances atol, rtol := 1e-11, 1e-5 osol.SetTol(atol, rtol) // run t0 := time.Now() if fixstp { osol.Solve(ya, xa, xb, 0.01, fixstp, &res) } else { osol.Solve(ya, xa, xb, xb-xa, fixstp, &res) } // plot if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) plt.SetForEps(2.0, 400) args := "'b-', marker='.', lw=1, clip_on=0" ode.Plot("/tmp/gosl/ode", "hwamplifier_mpi.eps", &res, nil, xa, xb, "", args, func() { _, T, err := io.ReadTable("data/radau5_hwamplifier.dat") if err != nil { chk.Panic("%v", err) } for j := 0; j < ndim; j++ { plt.Subplot(ndim+1, 1, j+1) plt.Plot(T["x"], T[io.Sf("y%d", j)], "'k+',label='reference',ms=10") } }) } }