// SolveC solves the linear Complex system A.x = b // NOTES: // 1) sum_b_to_root is a flag for MUMPS; it tells Solve to sum the values in 'b' arrays to the root processor func (o *LinSolMumps) SolveC(xR, xC, bR, bC []float64, sum_b_to_root bool) (err error) { // check if !o.cmplx { return chk.Err(_linsol_mumps_err11) } // start time if o.ton { o.tini = time.Now() } // message if o.verb { io.Pfgreen("\n . . . . . . . . . . . . . . LinSolMumps.SolveC . . . . . . . . . . . . . . . \n\n") } // MUMPS: set RHS in processor # 0 if sum_b_to_root { mpi.SumToRoot(xR, bR) mpi.SumToRoot(xC, bC) // join complex values if mpi.Rank() == 0 { for i := 0; i < len(xR); i++ { o.xRC[i*2], o.xRC[i*2+1] = xR[i], xC[i] } } } else { // join complex values if mpi.Rank() == 0 { for i := 0; i < len(xR); i++ { o.xRC[i*2], o.xRC[i*2+1] = bR[i], bC[i] } } } // MUMPS: solve o.mz.job = 3 // solution code C.zmumps_c(&o.mz) // solve if o.mz.info[1-1] < 0 { return chk.Err(_linsol_mumps_err12, mumps_error(o.mz.info[1-1], o.mz.info[2-1])) } // MUMPS: split complex values if mpi.Rank() == 0 { for i := 0; i < len(xR); i++ { xR[i], xC[i] = o.xRC[i*2], o.xRC[i*2+1] } } // MUMPS: broadcast from root mpi.BcastFromRoot(xR) mpi.BcastFromRoot(xC) // duration if o.ton { io.Pfcyan("%s: Time spent in LinSolMumps.Solve = %v\n", o.name, time.Now().Sub(o.tini)) } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { io.PfYel("\nTest MPI 03\n") } if mpi.Size() != 3 { chk.Panic("this test needs 3 processors") } x := []int{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1} n := len(x) id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*n)/sz, ((id+1)*n)/sz for i := start; i < endp1; i++ { x[i] = i } //io.Pforan("x = %v\n", x) // IntAllReduceMax w := make([]int, n) mpi.IntAllReduceMax(x, w) var tst testing.T chk.Ints(&tst, fmt.Sprintf("IntAllReduceMax: x @ proc # %d", id), x, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) //io.Pfred("x = %v\n", x) }
func main() { // catch errors defer func() { if err := recover(); err != nil { if mpi.Rank() == 0 { chk.Verbose = true for i := 8; i > 3; i-- { chk.CallerInfo(i) } io.PfRed("ERROR: %v\n", err) } } mpi.Stop(false) }() mpi.Start(false) // default input parameters // read input parameters fnamepath, _ := io.ArgToFilename(0, "", ".sim", true) verbose := io.ArgToBool(1, true) erasePrev := io.ArgToBool(2, true) saveSummary := io.ArgToBool(3, true) allowParallel := io.ArgToBool(4, true) alias := io.ArgToString(5, "") // message if mpi.Rank() == 0 && verbose { io.PfWhite("\nGofem v3 -- Go Finite Element Method\n\n") io.Pf("Copyright 2015 Dorival Pedroso and Raul Durand. All rights reserved.\n") io.Pf("Use of this source code is governed by a BSD-style\n") io.Pf("license that can be found in the LICENSE file.\n\n") io.Pf("\n%v\n", io.ArgsTable( "filename path", "fnamepath", fnamepath, "show messages", "verbose", verbose, "erase previous results", "erasePrev", erasePrev, "save summary", "saveSummary", saveSummary, "allow parallel run", "allowParallel", allowParallel, "word to add to results", "alias", alias, )) } // profiling? defer utl.DoProf(false)() // analysis data readSummary := false analysis := fem.NewFEM(fnamepath, alias, erasePrev, saveSummary, readSummary, allowParallel, verbose, 0) // run simulation err := analysis.Run() if err != nil { chk.Panic("Run failed:\n%v", err) } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test SumToRoot 01") } M := [][]float64{ {1000, 1000, 1000, 1011, 1021, 1000}, {1000, 1000, 1000, 1012, 1022, 1000}, {1000, 1000, 1000, 1013, 1023, 1000}, {1011, 1012, 1013, 1000, 1000, 1000}, {1021, 1022, 1023, 1000, 1000, 1000}, {1000, 1000, 1000, 1000, 1000, 1000}, } id, sz, m := mpi.Rank(), mpi.Size(), len(M) start, endp1 := (id*m)/sz, ((id+1)*m)/sz if sz > 6 { chk.Panic("this test works with at most 6 processors") } var J la.Triplet J.Init(m, m, m*m) for i := start; i < endp1; i++ { for j := 0; j < m; j++ { J.Put(i, j, M[i][j]) } } la.PrintMat(fmt.Sprintf("J @ proc # %d", id), J.ToMatrix(nil).ToDense(), "%10.1f", false) la.SpTriSumToRoot(&J) var tst testing.T if mpi.Rank() == 0 { chk.Matrix(&tst, "J @ proc 0", 1.0e-17, J.ToMatrix(nil).ToDense(), [][]float64{ {1000, 1000, 1000, 1011, 1021, 1000}, {1000, 1000, 1000, 1012, 1022, 1000}, {1000, 1000, 1000, 1013, 1023, 1000}, {1011, 1012, 1013, 1000, 1000, 1000}, {1021, 1022, 1023, 1000, 1000, 1000}, {1000, 1000, 1000, 1000, 1000, 1000}, }) } }
// SolveR solves the linear Real system A.x = b // NOTES: // 1) sum_b_to_root is a flag for MUMPS; it tells Solve to sum the values in 'b' arrays to the root processor func (o *LinSolMumps) SolveR(xR, bR []float64, sum_b_to_root bool) (err error) { // check if !o.is_initialised { return chk.Err("linear solver must be initialised first\n") } if o.cmplx { return chk.Err(_linsol_mumps_err09) } // start time if o.ton { o.tini = time.Now() } // message if o.verb { io.Pfgreen("\n . . . . . . . . . . . . . . LinSolMumps.SolveR . . . . . . . . . . . . . . . \n\n") } // MUMPS: set RHS in processor # 0 if sum_b_to_root { mpi.SumToRoot(xR, bR) } else { if mpi.Rank() == 0 { copy(xR, bR) // x := b } } // only proc # 0 needs the RHS if mpi.Rank() == 0 { o.m.rhs = (*C.double)(unsafe.Pointer(&xR[0])) } // MUMPS: solve o.m.job = 3 // solution code C.dmumps_c(&o.m) // solve if o.m.info[1-1] < 0 { return chk.Err(_linsol_mumps_err10, mumps_error(o.m.info[1-1], o.m.info[2-1])) } mpi.BcastFromRoot(xR) // broadcast from root // duration if o.ton { io.Pfcyan("%s: Time spent in LinSolMumps.Solve = %v\n", o.name, time.Now().Sub(o.tini)) } return }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("TestJacobian 02b (MPI)") } if mpi.Size() > 6 { io.Pf("this tests works with 6 or less MPI processors\n") return } ffcn := func(fx, x []float64) error { fx[0] = 2.0*x[0] - x[1] + sin(x[2]) - cos(x[3]) - x[5]*x[5] - 1.0 // 0 fx[1] = -x[0] + 2.0*x[1] + cos(x[2]) - sin(x[3]) + x[5] - 1.0 // 1 fx[2] = x[0] + 3.0*x[1] + sin(x[3]) - cos(x[4]) - x[5]*x[5] - 1.0 // 2 fx[3] = 2.0*x[0] + 4.0*x[1] + cos(x[3]) - cos(x[4]) + x[5] - 1.0 // 3 fx[4] = x[0] + 5.0*x[1] - sin(x[2]) + sin(x[4]) - x[5]*x[5]*x[5] - 1.0 // 4 fx[5] = x[0] + 6.0*x[1] - cos(x[2]) + cos(x[4]) + x[5] - 1.0 // 5 return nil } Jfcn := func(dfdx *la.Triplet, x []float64) error { dfdx.Start() J := [][]float64{ {2.0, -1.0, cos(x[2]), sin(x[3]), 0.0, -2.0 * x[5]}, {-1.0, 2.0, -sin(x[2]), -cos(x[3]), 0.0, 1.0}, {1.0, 3.0, 0.0, cos(x[3]), sin(x[4]), -2.0 * x[5]}, {2.0, 4.0, 0.0, -sin(x[3]), sin(x[4]), 1.0}, {1.0, 5.0, -cos(x[2]), 0.0, cos(x[4]), -3.0 * x[5] * x[5]}, {1.0, 6.0, sin(x[2]), 0.0, -sin(x[4]), 1.0}, } id, sz, ndim := mpi.Rank(), mpi.Size(), 6 start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz for col := 0; col < 6; col++ { for row := start; row < endp1; row++ { dfdx.Put(row, col, J[row][col]) } } //la.PrintMat(fmt.Sprintf("J @ %d",mpi.Rank()), dfdx.ToMatrix(nil).ToDense(), "%12.6f", false) return nil } x := []float64{5.0, 5.0, pi, pi, pi, 5.0} var tst testing.T num.CompareJac(&tst, ffcn, Jfcn, x, 1e-6, true) }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() myrank := mpi.Rank() if myrank == 0 { chk.PrintTitle("Test MUMPS Sol 05") } ndim := 10 id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz if mpi.Size() > ndim { chk.Panic("the number of processors must be smaller than or equal to %d", ndim) } n := 10 b := make([]complex128, n) x_correct := make([]complex128, n) // Let exact solution = 1 + 0.5i for i := 0; i < ndim; i++ { x_correct[i] = complex(float64(i+1), float64(i+1)/10.0) } var t la.TripletC t.Init(ndim, ndim, ndim, true) // assemble a and b for i := start; i < endp1; i++ { // Some very fake diagonals. Should take exactly 20 GMRES steps ar := 10.0 + float64(i)/(float64(ndim)/10.0) ac := 10.0 - float64(i)/(float64(ndim)/10.0) t.Put(i, i, ar, ac) // Generate RHS to match exact solution b[i] = complex(ar*real(x_correct[i])-ac*imag(x_correct[i]), ar*imag(x_correct[i])+ac*real(x_correct[i])) } sum_b_to_root := true la.RunMumpsTestC(&t, 1e-14, b, x_correct, sum_b_to_root) }
func main() { // catch errors var tst testing.T defer func() { if mpi.Rank() == 0 { if err := recover(); err != nil { io.PfRed("ERROR: %v\n", err) } if tst.Failed() { io.PfRed("test failed\n") } } mpi.Stop(false) }() mpi.Start(false) // start global variables and log analysis := fem.NewFEM("data/bh16.sim", "", true, true, false, true, true, 0) // run simulation err := analysis.Run() if err != nil { tst.Error("Run failed\n") return } // check skipK := true tolK := 1e-12 tolu := 1e-15 tols := 1e-12 fem.TestingCompareResultsU(&tst, "data/bh16.sim", "cmp/bh16.cmp", "", tolK, tolu, tols, skipK, true) }
func main() { // catch errors var tst testing.T defer func() { if mpi.Rank() == 0 { if err := recover(); err != nil { io.PfRed("ERROR: %v\n", err) } if tst.Failed() { io.PfRed("test failed\n") } } mpi.Stop(false) }() mpi.Start(false) // start global variables and log analysis := fem.NewFEM("data/p01.sim", "", true, true, false, true, true, 0) // run simulation err := analysis.Run() if err != nil { tst.Error("Run failed\n") return } }
func main() { // catch errors var tst testing.T defer func() { if mpi.Rank() == 0 { if err := recover(); err != nil { io.PfRed("ERROR: %v\n", err) } if tst.Failed() { io.PfRed("test failed\n") } } mpi.Stop(false) }() mpi.Start(false) // start global variables and log if !fem.Start("data/p01.sim", true, true) { tst.Error("Start failed\n") return } // make sure to flush log defer fem.End() // run simulation if !fem.Run() { tst.Error("Run failed\n") return } }
func (o *Solver) init_mpi() { if mpi.IsOn() { o.root = (mpi.Rank() == 0) if mpi.Size() > 1 { o.Distr = true } } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("TestJacobian 01b (MPI)") } if mpi.Size() != 2 { io.Pf("this tests needs MPI 2 processors\n") return } ffcn := func(fx, x []float64) error { fx[0] = math.Pow(x[0], 3.0) + x[1] - 1.0 fx[1] = -x[0] + math.Pow(x[1], 3.0) + 1.0 return nil } Jfcn := func(dfdx *la.Triplet, x []float64) error { dfdx.Start() if false { if mpi.Rank() == 0 { dfdx.Put(0, 0, 3.0*x[0]*x[0]) dfdx.Put(1, 0, -1.0) } else { dfdx.Put(0, 1, 1.0) dfdx.Put(1, 1, 3.0*x[1]*x[1]) } } else { if mpi.Rank() == 0 { dfdx.Put(0, 0, 3.0*x[0]*x[0]) dfdx.Put(0, 1, 1.0) } else { dfdx.Put(1, 0, -1.0) dfdx.Put(1, 1, 3.0*x[1]*x[1]) } } return nil } x := []float64{0.5, 0.5} var tst testing.T num.CompareJacMpi(&tst, ffcn, Jfcn, x, 1e-8, true) }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() myrank := mpi.Rank() if myrank == 0 { chk.PrintTitle("Test MUMPS Sol 01a") } var t la.Triplet switch mpi.Size() { case 1: t.Init(5, 5, 13) t.Put(0, 0, 1.0) t.Put(0, 0, 1.0) t.Put(1, 0, 3.0) t.Put(0, 1, 3.0) t.Put(2, 1, -1.0) t.Put(4, 1, 4.0) t.Put(1, 2, 4.0) t.Put(2, 2, -3.0) t.Put(3, 2, 1.0) t.Put(4, 2, 2.0) t.Put(2, 3, 2.0) t.Put(1, 4, 6.0) t.Put(4, 4, 1.0) case 2: if myrank == 0 { t.Init(5, 5, 6) t.Put(0, 0, 1.0) t.Put(0, 0, 1.0) t.Put(1, 0, 3.0) t.Put(0, 1, 3.0) t.Put(2, 1, -1.0) t.Put(4, 1, 4.0) } else { t.Init(5, 5, 7) t.Put(1, 2, 4.0) t.Put(2, 2, -3.0) t.Put(3, 2, 1.0) t.Put(4, 2, 2.0) t.Put(2, 3, 2.0) t.Put(1, 4, 6.0) t.Put(4, 4, 1.0) } default: chk.Panic("this test needs 1 or 2 procs") } b := []float64{8.0, 45.0, -3.0, 3.0, 19.0} x_correct := []float64{1, 2, 3, 4, 5} sum_b_to_root := false la.RunMumpsTestR(&t, 1e-14, b, x_correct, sum_b_to_root) }
func setslice(x []float64) { switch mpi.Rank() { case 0: copy(x, []float64{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}) case 1: copy(x, []float64{10, 10, 10, 20, 20, 20, 30, 30, 30, 40, 40}) case 2: copy(x, []float64{100, 100, 100, 1000, 1000, 1000, 2000, 2000, 2000, 3000, 3000}) } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() myrank := mpi.Rank() if myrank == 0 { chk.PrintTitle("Test MUMPS Sol 04") } ndim := 10 id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz if mpi.Size() > ndim { chk.Panic("the number of processors must be smaller than or equal to %d", ndim) } b := make([]complex128, ndim) var t la.TripletC t.Init(ndim, ndim, ndim*ndim, true) for i := start; i < endp1; i++ { j := i if i > 0 { j = i - 1 } for ; j < 10; j++ { val := 10.0 - float64(j) if i > j { val -= 1.0 } t.Put(i, j, val, 0) } b[i] = complex(float64(i+1), 0.0) } x_correct := []complex128{-1, 8, -65, 454, -2725, 13624, -54497, 163490, -326981, 326991} sum_b_to_root := true la.RunMumpsTestC(&t, 1e-4, b, x_correct, sum_b_to_root) }
func RunMumpsTestC(t *TripletC, tol_cmp float64, b, x_correct []complex128, sum_b_to_root bool) { // info symmetric := false verbose := false timing := false // allocate solver lis := GetSolver("mumps") defer lis.Clean() // initialise solver err := lis.InitC(t, symmetric, verbose, timing) if err != nil { chk.Panic("%v", err.Error()) } // factorise err = lis.Fact() if err != nil { chk.Panic("%v", err.Error()) } // solve bR, bC := ComplexToRC(b) xR := make([]float64, len(b)) xC := make([]float64, len(b)) err = lis.SolveC(xR, xC, bR, bC, sum_b_to_root) // x := inv(A) * b if err != nil { chk.Panic("%v", err.Error()) } x := RCtoComplex(xR, xC) if mpi.Rank() == 0 { // output A := t.ToMatrix(nil) io.Pforan("A.x = b\n") PrintMatC("A", A.ToDense(), "(%g+", "%gi) ", false) PrintVecC("x", x, "(%g+", "%gi) ", false) PrintVecC("b", b, "(%g+", "%gi) ", false) // check xR_correct, xC_correct := ComplexToRC(x_correct) errR := VecMaxDiff(xR, xR_correct) if errR > tol_cmp { chk.Panic("test failed: errR = %g", errR) } errC := VecMaxDiff(xC, xC_correct) if errC > tol_cmp { chk.Panic("test failed: errC = %g", errC) } io.Pf("err(xR) = %g [1;32mOK[0m\n", errR) io.Pf("err(xC) = %g [1;32mOK[0m\n", errC) } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { io.PfYel("\nTest MPI 04\n") } for i := 0; i < 60; i++ { time.Sleep(1e9) io.Pf("hello from %v\n", mpi.Rank()) if mpi.Rank() == 2 && i == 3 { io.PfGreen("rank = 3 wants to abort (the following error is OK)\n") mpi.Abort() } } }
// Start initialises 'global' and starts logging func Start(simfilepath string, erasefiles, verbose bool) (startisok bool) { // multiprocessing data Global.Rank = 0 Global.Nproc = 1 Global.Root = true Global.Distr = false if mpi.IsOn() { Global.Rank = mpi.Rank() Global.Nproc = mpi.Size() Global.Root = Global.Rank == 0 Global.Distr = Global.Nproc > 1 } Global.Verbose = verbose if !Global.Root { Global.Verbose = false } Global.WspcStop = make([]int, Global.Nproc) Global.WspcInum = make([]int, Global.Nproc) // simulation and convenience variables dir := filepath.Dir(simfilepath) fn := filepath.Base(simfilepath) Global.Sim = inp.ReadSim(dir, fn, Global.LogPrefix, erasefiles) LogErrCond(Global.Sim == nil, "ReadSim failed\n") if Stop() { return } Global.Ndim = Global.Sim.Ndim Global.Dirout = Global.Sim.Data.DirOut Global.Fnkey = Global.Sim.Data.FnameKey Global.Enc = Global.Sim.Data.Encoder Global.Stat = Global.Sim.Data.Stat Global.LogBcs = Global.Sim.Data.LogBcs Global.Debug = Global.Sim.Data.Debug // fix show residual flag if !Global.Root { Global.Sim.Data.ShowR = false } // auxiliar structures Global.DynCoefs = new(DynCoefs) if !Global.DynCoefs.Init(&Global.Sim.Solver) { return } Global.HydroSt = new(HydroStatic) Global.HydroSt.Init() // success return true }
/* Jacobian ======== Calculates (with N=n-1): df0dx0, df0dx1, df0dx2, ... df0dxN df1dx0, df1dx1, df1dx2, ... df1dxN . . . . . . . . . . . . . dfNdx0, dfNdx1, dfNdx2, ... dfNdxN INPUT: ffcn : f(x) function x : station where dfdx has to be calculated fx : f @ x w : workspace with size == n == len(x) RETURNS: J : dfdx @ x [must be pre-allocated] */ func Jacobian(J *la.Triplet, ffcn Cb_f, x, fx, w []float64, distr bool) (err error) { ndim := len(x) start, endp1 := 0, ndim if distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 = (id*ndim)/sz, ((id+1)*ndim)/sz if J.Max() == 0 { J.Init(ndim, ndim, (endp1-start)*ndim) } } else { if J.Max() == 0 { J.Init(ndim, ndim, ndim*ndim) } } J.Start() // NOTE: cannot split calculation by columns unless the f function is // independently calculated by each MPI processor. // Otherwise, the AllReduce in f calculation would // join pieces of f from different processors calculated for // different x values (δx[col] from different columns). /* for col := start; col < endp1; col++ { xsafe := x[col] delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe))) x[col] = xsafe + delta ffcn(w, x) // fnew io.Pforan("x = %v, f = %v\n", x, w) for row := 0; row < ndim; row++ { J.Put(row, col, (w[row]-fx[row])/delta) } x[col] = xsafe } */ var df float64 for col := 0; col < ndim; col++ { xsafe := x[col] delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe))) x[col] = xsafe + delta err = ffcn(w, x) // w := f(x+δx[col]) if err != nil { return } for row := start; row < endp1; row++ { df = w[row] - fx[row] //if math.Abs(df) > EPS { J.Put(row, col, df/delta) //} } x[col] = xsafe } return }
// InitLogFile initialises logger func InitLogFile(dirout, fnamekey string) (err error) { // create log file var rank int if mpi.IsOn() { rank = mpi.Rank() } LogFile, err = os.Create(io.Sf("%s/%s_p%d.log", dirout, fnamekey, rank)) if err != nil { return } // connect logger to output file log.SetOutput(LogFile) return }
func RunMumpsTestR(t *Triplet, tol_cmp float64, b, x_correct []float64, sum_b_to_root bool) { // info symmetric := false verbose := false timing := false // allocate solver lis := GetSolver("mumps") defer lis.Clean() // initialise solver err := lis.InitR(t, symmetric, verbose, timing) if err != nil { chk.Panic("%v", err.Error()) } // factorise err = lis.Fact() if err != nil { chk.Panic("%v", err.Error()) } // solve x := make([]float64, len(b)) err = lis.SolveR(x, b, sum_b_to_root) // x := inv(A) * b if err != nil { chk.Panic("%v", err.Error()) } if mpi.Rank() == 0 { // output A := t.ToMatrix(nil) io.Pforan("A.x = b\n") PrintMat("A", A.ToDense(), "%5g", false) PrintVec("x", x, "%g ", false) PrintVec("b", b, "%g ", false) // check err := VecMaxDiff(x, x_correct) if err > tol_cmp { chk.Panic("test failed: err = %g", err) } io.Pf("err(x) = %g [1;32mOK[0m\n", err) } }
// SpTriSumToRoot join (MPI) parallel triplets to root (Rank == 0) processor. // NOTE: J in root is also joined into Jroot func SpTriSumToRoot(J *Triplet) { if mpi.Rank() == 0 { for proc := 1; proc < mpi.Size(); proc++ { nnz := mpi.SingleIntRecv(proc) irec := make([]int, nnz) drec := make([]float64, nnz) mpi.IntRecv(irec, proc) J.i = append(J.i, irec...) mpi.IntRecv(irec, proc) J.j = append(J.j, irec...) mpi.DblRecv(drec, proc) J.x = append(J.x, drec...) } J.pos = len(J.x) J.max = J.pos } else { mpi.SingleIntSend(J.max, 0) mpi.IntSend(J.i, 0) mpi.IntSend(J.j, 0) mpi.DblSend(J.x, 0) } }
func main() { // catch errors var tst testing.T defer func() { if mpi.Rank() == 0 { if err := recover(); err != nil { io.PfRed("ERROR: %v\n", err) } if tst.Failed() { io.PfRed("test failed\n") } } mpi.Stop(false) }() mpi.Start(false) // start global variables and log if !fem.Start("data/spo751.sim", true, true) { tst.Error("Start failed\n") return } // make sure to flush log defer fem.End() // run simulation if !fem.Run() { tst.Error("Run failed\n") return } // check skipK := true tolK := 1e-17 tolu := 1e-12 tols := 1e-14 fem.TestingCompareResultsU(&tst, "data/spo751.sim", "cmp/spo751.cmp", tolK, tolu, tols, skipK, true) }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test ODE 02b") io.Pfcyan("Hairer-Wanner VII-p5 Eq.(1.5) Van der Pol's Equation (MPI)\n") } if mpi.Size() != 2 { chk.Panic(">> error: this test requires 2 MPI processors\n") return } eps := 1.0e-6 w := make([]float64, 2) // workspace fcn := func(f []float64, x float64, y []float64, args ...interface{}) error { f[0], f[1] = 0, 0 switch mpi.Rank() { case 0: f[0] = y[1] case 1: f[1] = ((1.0-y[0]*y[0])*y[1] - y[0]) / eps } // join all f mpi.AllReduceSum(f, w) return nil } jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error { if dfdy.Max() == 0 { dfdy.Init(2, 2, 4) } dfdy.Start() if false { // per column switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) case 1: dfdy.Put(0, 1, 1.0) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } else { // per row switch mpi.Rank() { case 0: dfdy.Put(0, 0, 0.0) dfdy.Put(0, 1, 1.0) case 1: dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps) dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps) } } return nil } // data silent := false fixstp := false //method := "Dopri5" method := "Radau5" xa, xb := 0.0, 2.0 ya := []float64{2.0, -0.6} ndim := len(ya) // output var b bytes.Buffer out := func(first bool, dx, x float64, y []float64, args ...interface{}) error { if mpi.Rank() == 0 { if first { fmt.Fprintf(&b, "%23s %23s %23s %23s\n", "dx", "x", "y0", "y1") } fmt.Fprintf(&b, "%23.15E %23.15E %23.15E %23.15E\n", dx, x, y[0], y[1]) } return nil } defer func() { if mpi.Rank() == 0 { extra := "d2 = Read('data/vdpol_radau5_for.dat')\n" + "subplot(3,1,1)\n" + "plot(d2['x'],d2['y0'],'k+',label='res',ms=10)\n" + "subplot(3,1,2)\n" + "plot(d2['x'],d2['y1'],'k+',label='res',ms=10)\n" ode.Plot("/tmp/gosl", "vdpolB", method, &b, []int{0, 1}, ndim, nil, xa, xb, true, false, extra) } }() // one run var o ode.ODE o.Distr = true //numjac := true numjac := false if numjac { o.Init(method, ndim, fcn, nil, nil, out, silent) } else { o.Init(method, ndim, fcn, jac, nil, out, silent) } // tolerances and initial step size rtol := 1e-4 atol := rtol o.SetTol(atol, rtol) o.IniH = 1.0e-4 //o.NmaxSS = 2 y := make([]float64, ndim) copy(y, ya) t0 := time.Now() if fixstp { o.Solve(y, xa, xb, 0.05, fixstp) } else { o.Solve(y, xa, xb, xb-xa, fixstp) } if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) } }
func main() { // catch errors defer func() { if err := recover(); err != nil { if mpi.Rank() == 0 { chk.Verbose = true for i := 8; i > 3; i-- { chk.CallerInfo(i) } io.PfRed("ERROR: %v\n", err) } } mpi.Stop(false) }() mpi.Start(false) // message if mpi.Rank() == 0 { io.PfWhite("\nGofem v3 -- Go Finite Element Method\n\n") io.Pf("Copyright 2015 Dorival Pedroso and Raul Durand. All rights reserved.\n") io.Pf("Use of this source code is governed by a BSD-style\n") io.Pf("license that can be found in the LICENSE file.\n\n") } // simulation filenamepath flag.Parse() var fnamepath string if len(flag.Args()) > 0 { fnamepath = flag.Arg(0) } else { chk.Panic("Please, provide a filename. Ex.: cylinder.sim") } // check extension if io.FnExt(fnamepath) == "" { fnamepath += ".sim" } // other options erasefiles := true verbose := true if len(flag.Args()) > 1 { erasefiles = io.Atob(flag.Arg(1)) } if len(flag.Args()) > 2 { verbose = io.Atob(flag.Arg(2)) } // profiling? defer utl.DoProf(false)() // start global variables and log if !fem.Start(fnamepath, erasefiles, verbose) { chk.Panic("Start failed\n") return } // make sure to flush log defer fem.End() // run simulation if !fem.Run() { io.PfRed("ERROR: cannot run simulation\n") } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { chk.PrintTitle("Test ODE 04b (MPI)") io.Pfcyan("Hairer-Wanner VII-p376 Transistor Amplifier (MPI)\n") io.Pfcyan("(from E Hairer's website, not the system in the book)\n") } if mpi.Size() != 3 { chk.Panic(">> error: this test requires 3 MPI processors\n") return } // RIGHT-HAND SIDE OF THE AMPLIFIER PROBLEM w := make([]float64, 8) // workspace fcn := func(f []float64, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) UET := d.UE * math.Sin(d.W*x) FAC1 := d.BETA * (math.Exp((y[3]-y[2])/d.UF) - 1.0) FAC2 := d.BETA * (math.Exp((y[6]-y[5])/d.UF) - 1.0) la.VecFill(f, 0) switch mpi.Rank() { case 0: f[0] = y[0] / d.R9 case 1: f[1] = (y[1]-d.UB)/d.R8 + d.ALPHA*FAC1 f[2] = y[2]/d.R7 - FAC1 case 2: f[3] = y[3]/d.R5 + (y[3]-d.UB)/d.R6 + (1.0-d.ALPHA)*FAC1 f[4] = (y[4]-d.UB)/d.R4 + d.ALPHA*FAC2 f[5] = y[5]/d.R3 - FAC2 f[6] = y[6]/d.R1 + (y[6]-d.UB)/d.R2 + (1.0-d.ALPHA)*FAC2 f[7] = (y[7] - UET) / d.R0 } mpi.AllReduceSum(f, w) return nil } // JACOBIAN OF THE AMPLIFIER PROBLEM jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error { d := args[0].(*HWtransData) FAC14 := d.BETA * math.Exp((y[3]-y[2])/d.UF) / d.UF FAC27 := d.BETA * math.Exp((y[6]-y[5])/d.UF) / d.UF if dfdy.Max() == 0 { dfdy.Init(8, 8, 16) } NU := 2 dfdy.Start() switch mpi.Rank() { case 0: dfdy.Put(2+0-NU, 0, 1.0/d.R9) dfdy.Put(2+1-NU, 1, 1.0/d.R8) dfdy.Put(1+2-NU, 2, -d.ALPHA*FAC14) dfdy.Put(0+3-NU, 3, d.ALPHA*FAC14) dfdy.Put(2+2-NU, 2, 1.0/d.R7+FAC14) case 1: dfdy.Put(1+3-NU, 3, -FAC14) dfdy.Put(2+3-NU, 3, 1.0/d.R5+1.0/d.R6+(1.0-d.ALPHA)*FAC14) dfdy.Put(3+2-NU, 2, -(1.0-d.ALPHA)*FAC14) dfdy.Put(2+4-NU, 4, 1.0/d.R4) dfdy.Put(1+5-NU, 5, -d.ALPHA*FAC27) case 2: dfdy.Put(0+6-NU, 6, d.ALPHA*FAC27) dfdy.Put(2+5-NU, 5, 1.0/d.R3+FAC27) dfdy.Put(1+6-NU, 6, -FAC27) dfdy.Put(2+6-NU, 6, 1.0/d.R1+1.0/d.R2+(1.0-d.ALPHA)*FAC27) dfdy.Put(3+5-NU, 5, -(1.0-d.ALPHA)*FAC27) dfdy.Put(2+7-NU, 7, 1.0/d.R0) } return nil } // MATRIX "M" c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6 var M la.Triplet M.Init(8, 8, 14) M.Start() NU := 1 switch mpi.Rank() { case 0: M.Put(1+0-NU, 0, -c5) M.Put(0+1-NU, 1, c5) M.Put(2+0-NU, 0, c5) M.Put(1+1-NU, 1, -c5) M.Put(1+2-NU, 2, -c4) M.Put(1+3-NU, 3, -c3) case 1: M.Put(0+4-NU, 4, c3) M.Put(2+3-NU, 3, c3) M.Put(1+4-NU, 4, -c3) case 2: M.Put(1+5-NU, 5, -c2) M.Put(1+6-NU, 6, -c1) M.Put(0+7-NU, 7, c1) M.Put(2+6-NU, 6, c1) M.Put(1+7-NU, 7, -c1) } // WRITE FILE FUNCTION idxstp := 1 var b bytes.Buffer out := func(first bool, dx, x float64, y []float64, args ...interface{}) error { if mpi.Rank() == 0 { if first { fmt.Fprintf(&b, "%6s%23s%23s%23s%23s%23s%23s%23s%23s%23s\n", "ns", "x", "y0", "y1", "y2", "y3", "y4", "y5", "y6", "y7") } fmt.Fprintf(&b, "%6d%23.15E", idxstp, x) for j := 0; j < len(y); j++ { fmt.Fprintf(&b, "%23.15E", y[j]) } fmt.Fprintf(&b, "\n") idxstp += 1 } return nil } defer func() { if mpi.Rank() == 0 { io.WriteFileD("/tmp/gosl", "hwamplifierB.res", &b) } }() // INITIAL DATA D, xa, xb, ya := HWtransIni() // SET ODE SOLVER silent := false fixstp := false //method := "Dopri5" method := "Radau5" ndim := len(ya) //numjac := true numjac := false var osol ode.ODE osol.Pll = true if numjac { osol.Init(method, ndim, fcn, nil, &M, out, silent) } else { osol.Init(method, ndim, fcn, jac, &M, out, silent) } osol.IniH = 1.0e-6 // initial step size // SET TOLERANCES atol, rtol := 1e-11, 1e-5 osol.SetTol(atol, rtol) // RUN t0 := time.Now() if fixstp { osol.Solve(ya, xa, xb, 0.01, fixstp, &D) } else { osol.Solve(ya, xa, xb, xb-xa, fixstp, &D) } if mpi.Rank() == 0 { io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0)) } }
// Init initialises ODE structure with default values and allocate slices func (o *ODE) Init(method string, ndim int, fcn Cb_fcn, jac Cb_jac, M *la.Triplet, out Cb_out, silent bool) { // primary variables o.method = method o.ndim = ndim o.fcn = fcn o.jac = jac o.out = out o.silent = silent o.ZeroTrial = false o.Atol = 1.0e-4 o.Rtol = 1.0e-4 o.IniH = 1.0e-4 o.NmaxIt = 7 o.NmaxSS = 1000 o.Mmin = 0.125 o.Mmax = 5.0 o.Mfac = 0.9 o.PredCtrl = true o.ϵ = 1.0e-16 o.θmax = 1.0e-3 o.C1h = 1.0 o.C2h = 1.2 o.LerrStrat = 3 o.Pll = true o.UseRmsNorm = true o.SetTol(o.Atol, o.Rtol) // derived variables o.root = true if mpi.IsOn() { o.root = (mpi.Rank() == 0) if mpi.Size() > 1 { o.Distr = true } } // M matrix if M != nil { o.mTri = M o.mMat = o.mTri.ToMatrix(nil) o.hasM = true } else { if o.method == "BwEuler" { M = new(la.Triplet) la.SpTriSetDiag(M, o.ndim, 1) o.mTri = M o.mMat = o.mTri.ToMatrix(nil) o.hasM = true } } // method switch method { case "FwEuler": o.step = fweuler_step o.accept = fweuler_accept o.nstg = 1 case "BwEuler": o.step = bweuler_step o.accept = bweuler_accept o.nstg = 1 case "MoEuler": o.step = erk_step o.accept = erk_accept o.nstg = 2 o.erkdat = ERKdat{true, ME2_a, ME2_b, ME2_be, ME2_c} case "Dopri5": o.step = erk_step o.accept = erk_accept o.nstg = 7 o.erkdat = ERKdat{true, DP5_a, DP5_b, DP5_be, DP5_c} case "Radau5": o.step = radau5_step o.accept = radau5_accept o.nstg = 3 default: chk.Panic(_ode_err1, method) } // allocate step variables o.f0 = make([]float64, o.ndim) o.scal = make([]float64, o.ndim) // allocate rk variables o.u = make([]float64, o.nstg) o.v = make([][]float64, o.nstg) o.w = make([][]float64, o.nstg) o.δw = make([][]float64, o.nstg) o.f = make([][]float64, o.nstg) if method == "Radau5" { o.z = make([][]float64, o.nstg) o.ycol = make([][]float64, o.nstg) o.ez = make([]float64, o.ndim) o.lerr = make([]float64, o.ndim) o.rhs = make([]float64, o.ndim) } for i := 0; i < o.nstg; i++ { o.v[i] = make([]float64, o.ndim) o.w[i] = make([]float64, o.ndim) o.δw[i] = make([]float64, o.ndim) o.f[i] = make([]float64, o.ndim) if method == "Radau5" { o.z[i] = make([]float64, o.ndim) o.ycol[i] = make([]float64, o.ndim) } } }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() myrank := mpi.Rank() if myrank == 0 { chk.PrintTitle("Test MUMPS Sol 01b") } var t la.Triplet b := []float64{8.0, 45.0, -3.0, 3.0, 19.0} switch mpi.Size() { case 1: t.Init(5, 5, 13) t.Put(0, 0, 1.0) t.Put(0, 0, 1.0) t.Put(1, 0, 3.0) t.Put(0, 1, 3.0) t.Put(2, 1, -1.0) t.Put(4, 1, 4.0) t.Put(1, 2, 4.0) t.Put(2, 2, -3.0) t.Put(3, 2, 1.0) t.Put(4, 2, 2.0) t.Put(2, 3, 2.0) t.Put(1, 4, 6.0) t.Put(4, 4, 1.0) case 2: la.VecFill(b, 0) if myrank == 0 { t.Init(5, 5, 8) t.Put(0, 0, 1.0) t.Put(0, 0, 1.0) t.Put(1, 0, 3.0) t.Put(0, 1, 3.0) t.Put(2, 1, -1.0) t.Put(4, 1, 1.0) t.Put(4, 1, 1.5) t.Put(4, 1, 1.5) b[0] = 8.0 b[1] = 40.0 b[2] = 1.5 } else { t.Init(5, 5, 8) t.Put(1, 2, 4.0) t.Put(2, 2, -3.0) t.Put(3, 2, 1.0) t.Put(4, 2, 2.0) t.Put(2, 3, 2.0) t.Put(1, 4, 6.0) t.Put(4, 4, 0.5) t.Put(4, 4, 0.5) b[1] = 5.0 b[2] = -4.5 b[3] = 3.0 b[4] = 19.0 } default: chk.Panic("this test needs 1 or 2 procs") } x_correct := []float64{1, 2, 3, 4, 5} sum_b_to_root := true la.RunMumpsTestR(&t, 1e-14, b, x_correct, sum_b_to_root) }
func main() { mpi.Start(false) defer func() { mpi.Stop(false) }() if mpi.Rank() == 0 { io.PfYel("\nTest MPI 01\n") } if mpi.Size() != 3 { chk.Panic("this test needs 3 processors") } n := 11 x := make([]float64, n) id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*n)/sz, ((id+1)*n)/sz for i := start; i < endp1; i++ { x[i] = float64(i) } // Barrier mpi.Barrier() io.Pfgrey("x @ proc # %d = %v\n", id, x) // SumToRoot r := make([]float64, n) mpi.SumToRoot(r, x) var tst testing.T if id == 0 { chk.Vector(&tst, fmt.Sprintf("SumToRoot: r @ proc # %d", id), 1e-17, r, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) } else { chk.Vector(&tst, fmt.Sprintf("SumToRoot: r @ proc # %d", id), 1e-17, r, make([]float64, n)) } // BcastFromRoot r[0] = 666 mpi.BcastFromRoot(r) chk.Vector(&tst, fmt.Sprintf("BcastFromRoot: r @ proc # %d", id), 1e-17, r, []float64{666, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) // AllReduceSum setslice(x) w := make([]float64, n) mpi.AllReduceSum(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceSum: w @ proc # %d", id), 1e-17, w, []float64{110, 110, 110, 1021, 1021, 1021, 2032, 2032, 2032, 3043, 3043}) // AllReduceSumAdd setslice(x) y := []float64{-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000} mpi.AllReduceSumAdd(y, x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceSumAdd: y @ proc # %d", id), 1e-17, y, []float64{-890, -890, -890, 21, 21, 21, 1032, 1032, 1032, 2043, 2043}) // AllReduceMin setslice(x) mpi.AllReduceMin(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceMin: x @ proc # %d", id), 1e-17, x, []float64{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}) // AllReduceMax setslice(x) mpi.AllReduceMax(x, w) chk.Vector(&tst, fmt.Sprintf("AllReduceMax: x @ proc # %d", id), 1e-17, x, []float64{100, 100, 100, 1000, 1000, 1000, 2000, 2000, 2000, 3000, 3000}) }
// Radau5 step function func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) { // factors α := r5.α_ / o.h β := r5.β_ / o.h γ := r5.γ_ / o.h // Jacobian and decomposition if o.reuseJdec { o.reuseJdec = false } else { // calculate only first Jacobian for all iterations (simple/modified Newton's method) if o.reuseJ { o.reuseJ = false } else if !o.jacIsOK { // Jacobian triplet if o.jac == nil { // numerical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") } err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) { e = o.fcn(fy, o.h, x0, y, args...) return }, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable } else { // analytical //if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") } err = o.jac(&o.dfdyT, o.h, x0, y0, args...) } if err != nil { return } // create M matrix if o.doinit && !o.hasM { o.mTri = new(la.Triplet) if o.Distr { id, sz := mpi.Rank(), mpi.Size() start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz o.mTri.Init(o.ndim, o.ndim, endp1-start) for i := start; i < endp1; i++ { o.mTri.Put(i, i, 1.0) } } else { o.mTri.Init(o.ndim, o.ndim, o.ndim) for i := 0; i < o.ndim; i++ { o.mTri.Put(i, i, 1.0) } } } o.njeval += 1 o.jacIsOK = true } // initialise triplets if o.doinit { o.rctriR = new(la.Triplet) o.rctriC = new(la.TripletC) o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len()) xzmono := o.Distr o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono) } // update triplets la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT) // rctriR := γ*M - dfdy la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy // initialise solver if o.doinit { err = o.lsolR.InitR(o.rctriR, false, false, false) if err != nil { return } err = o.lsolC.InitC(o.rctriC, false, false, false) if err != nil { return } } // perform factorisation o.lsolR.Fact() o.lsolC.Fact() o.ndecomp += 1 } // updated u[i] o.u[0] = x0 + r5.c[0]*o.h o.u[1] = x0 + r5.c[1]*o.h o.u[2] = x0 + r5.c[2]*o.h // (trial/initial) updated z[i] and w[i] if o.first || o.ZeroTrial { for m := 0; m < o.ndim; m++ { o.z[0][m], o.w[0][m] = 0.0, 0.0 o.z[1][m], o.w[1][m] = 0.0, 0.0 o.z[2][m], o.w[2][m] = 0.0, 0.0 } } else { c3q := o.h / o.hprev c1q := r5.μ1 * c3q c2q := r5.μ2 * c3q for m := 0; m < o.ndim; m++ { o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m])) o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m])) o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m])) o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m] o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m] o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m] } } // iterations o.nit = 0 o.η = math.Pow(max(o.η, o.ϵ), 0.8) o.θ = o.θmax o.diverg = false var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64 var it int for it = 0; it < o.NmaxIt; it++ { // max iterations ? o.nit = it + 1 if o.nit > o.nitmax { o.nitmax = o.nit } // evaluate f(x,y) at (u[i],v[i]=y0+z[i]) for i := 0; i < 3; i++ { for m := 0; m < o.ndim; m++ { o.v[i][m] = y0[m] + o.z[i][m] } o.nfeval += 1 err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...) if err != nil { return } } // calc rhs if o.hasM { // using δw as workspace here la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0 la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1 la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2 if o.Distr { mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here } for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m] } } else { for m := 0; m < o.ndim; m++ { o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m] o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m] o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m] } } // solve linear system o.nlinsol += 1 var errR, errC error if !o.Distr && o.Pll { wg := new(sync.WaitGroup) wg.Add(2) go func() { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) wg.Done() }() go func() { errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) wg.Done() }() wg.Wait() } else { errR = o.lsolR.SolveR(o.δw[0], o.v[0], false) errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false) } // check for errors from linear solution if errR != nil || errC != nil { var errmsg string if errR != nil { errmsg += errR.Error() } if errC != nil { if errR != nil { errmsg += "\n" } errmsg += errC.Error() } err = errors.New(errmsg) return } // update w and z for m := 0; m < o.ndim; m++ { o.w[0][m] += o.δw[0][m] o.w[1][m] += o.δw[1][m] o.w[2][m] += o.δw[2][m] o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m] o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m] o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m] } // rms norm of δw Lδw = 0.0 for m := 0; m < o.ndim; m++ { Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0) } Lδw = math.Sqrt(Lδw / float64(3*o.ndim)) // check convergence if it > 0 { thq = Lδw / oLδw if it == 1 { o.θ = thq } else { o.θ = math.Sqrt(thq * othq) } othq = thq if o.θ < 0.99 { o.η = o.θ / (1.0 - o.θ) iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ) itRerr = iterr / o.fnewt if itRerr >= 1.0 { // diverging qnewt = max(1.0e-4, min(20.0, itRerr)) o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit))) o.diverg = true break } } else { // diverging badly (unexpected step-rejection) o.dvfac = 0.5 o.diverg = true break } } // save old norm oLδw = Lδw // converged if o.η*Lδw < o.fnewt { break } } // did not converge if it == o.NmaxIt-1 { chk.Panic("radau5_step failed with it=%d", it) } // diverging => stop if o.diverg { rerr = 2.0 // must leave state intact, any rerr is OK return } // error estimate if o.LerrStrat == 1 { // simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems) for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m] rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0) } rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10) } else { // common if o.hasM { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] } if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { for m := 0; m < o.ndim; m++ { o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m] o.rhs[m] = o.f0[m] + γ*o.ez[m] } } // HW-VII p123 Eq.(8.19) if o.LerrStrat == 2 { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) // HW-VII p123 Eq.(8.20) } else { o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) if !(rerr < 1.0) { if o.first || o.reject { for m := 0; m < o.ndim; m++ { o.v[0][m] = y0[m] + o.lerr[m] // y0perr } o.nfeval += 1 err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr if err != nil { return } if o.hasM { la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr if o.Distr { la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez) // δw[0] = γ * M * ez (δw[0] is workspace) mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace) } else { la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez } } else { la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez } o.lsolR.SolveR(o.lerr, o.rhs, false) rerr = o.rms_norm(o.lerr) } } } } return }