示例#1
0
// SolveC solves the linear Complex system A.x = b
//  NOTES:
//    1) sum_b_to_root is a flag for MUMPS; it tells Solve to sum the values in 'b' arrays to the root processor
func (o *LinSolMumps) SolveC(xR, xC, bR, bC []float64, sum_b_to_root bool) (err error) {

	// check
	if !o.cmplx {
		return chk.Err(_linsol_mumps_err11)
	}

	// start time
	if o.ton {
		o.tini = time.Now()
	}

	// message
	if o.verb {
		io.Pfgreen("\n . . . . . . . . . . . . . . LinSolMumps.SolveC . . . . . . . . . . . . . . . \n\n")
	}

	// MUMPS: set RHS in processor # 0
	if sum_b_to_root {
		mpi.SumToRoot(xR, bR)
		mpi.SumToRoot(xC, bC)
		// join complex values
		if mpi.Rank() == 0 {
			for i := 0; i < len(xR); i++ {
				o.xRC[i*2], o.xRC[i*2+1] = xR[i], xC[i]
			}
		}
	} else {
		// join complex values
		if mpi.Rank() == 0 {
			for i := 0; i < len(xR); i++ {
				o.xRC[i*2], o.xRC[i*2+1] = bR[i], bC[i]
			}
		}
	}

	// MUMPS: solve
	o.mz.job = 3      // solution code
	C.zmumps_c(&o.mz) // solve
	if o.mz.info[1-1] < 0 {
		return chk.Err(_linsol_mumps_err12, mumps_error(o.mz.info[1-1], o.mz.info[2-1]))
	}

	// MUMPS: split complex values
	if mpi.Rank() == 0 {
		for i := 0; i < len(xR); i++ {
			xR[i], xC[i] = o.xRC[i*2], o.xRC[i*2+1]
		}
	}

	// MUMPS: broadcast from root
	mpi.BcastFromRoot(xR)
	mpi.BcastFromRoot(xC)

	// duration
	if o.ton {
		io.Pfcyan("%s: Time spent in LinSolMumps.Solve = %v\n", o.name, time.Now().Sub(o.tini))
	}
	return
}
示例#2
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		io.PfYel("\nTest MPI 03\n")
	}
	if mpi.Size() != 3 {
		chk.Panic("this test needs 3 processors")
	}
	x := []int{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}
	n := len(x)
	id, sz := mpi.Rank(), mpi.Size()
	start, endp1 := (id*n)/sz, ((id+1)*n)/sz
	for i := start; i < endp1; i++ {
		x[i] = i
	}

	//io.Pforan("x = %v\n", x)

	// IntAllReduceMax
	w := make([]int, n)
	mpi.IntAllReduceMax(x, w)
	var tst testing.T
	chk.Ints(&tst, fmt.Sprintf("IntAllReduceMax: x @ proc # %d", id), x, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})

	//io.Pfred("x = %v\n", x)
}
示例#3
0
func main() {

	// catch errors
	defer func() {
		if err := recover(); err != nil {
			if mpi.Rank() == 0 {
				chk.Verbose = true
				for i := 8; i > 3; i-- {
					chk.CallerInfo(i)
				}
				io.PfRed("ERROR: %v\n", err)
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// default input parameters

	// read input parameters
	fnamepath, _ := io.ArgToFilename(0, "", ".sim", true)
	verbose := io.ArgToBool(1, true)
	erasePrev := io.ArgToBool(2, true)
	saveSummary := io.ArgToBool(3, true)
	allowParallel := io.ArgToBool(4, true)
	alias := io.ArgToString(5, "")

	// message
	if mpi.Rank() == 0 && verbose {
		io.PfWhite("\nGofem v3 -- Go Finite Element Method\n\n")
		io.Pf("Copyright 2015 Dorival Pedroso and Raul Durand. All rights reserved.\n")
		io.Pf("Use of this source code is governed by a BSD-style\n")
		io.Pf("license that can be found in the LICENSE file.\n\n")

		io.Pf("\n%v\n", io.ArgsTable(
			"filename path", "fnamepath", fnamepath,
			"show messages", "verbose", verbose,
			"erase previous results", "erasePrev", erasePrev,
			"save summary", "saveSummary", saveSummary,
			"allow parallel run", "allowParallel", allowParallel,
			"word to add to results", "alias", alias,
		))
	}

	// profiling?
	defer utl.DoProf(false)()

	// analysis data
	readSummary := false
	analysis := fem.NewFEM(fnamepath, alias, erasePrev, saveSummary, readSummary, allowParallel, verbose, 0)

	// run simulation
	err := analysis.Run()
	if err != nil {
		chk.Panic("Run failed:\n%v", err)
	}
}
示例#4
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		chk.PrintTitle("Test SumToRoot 01")
	}

	M := [][]float64{
		{1000, 1000, 1000, 1011, 1021, 1000},
		{1000, 1000, 1000, 1012, 1022, 1000},
		{1000, 1000, 1000, 1013, 1023, 1000},
		{1011, 1012, 1013, 1000, 1000, 1000},
		{1021, 1022, 1023, 1000, 1000, 1000},
		{1000, 1000, 1000, 1000, 1000, 1000},
	}

	id, sz, m := mpi.Rank(), mpi.Size(), len(M)
	start, endp1 := (id*m)/sz, ((id+1)*m)/sz

	if sz > 6 {
		chk.Panic("this test works with at most 6 processors")
	}

	var J la.Triplet
	J.Init(m, m, m*m)
	for i := start; i < endp1; i++ {
		for j := 0; j < m; j++ {
			J.Put(i, j, M[i][j])
		}
	}
	la.PrintMat(fmt.Sprintf("J @ proc # %d", id), J.ToMatrix(nil).ToDense(), "%10.1f", false)

	la.SpTriSumToRoot(&J)
	var tst testing.T
	if mpi.Rank() == 0 {
		chk.Matrix(&tst, "J @ proc 0", 1.0e-17, J.ToMatrix(nil).ToDense(), [][]float64{
			{1000, 1000, 1000, 1011, 1021, 1000},
			{1000, 1000, 1000, 1012, 1022, 1000},
			{1000, 1000, 1000, 1013, 1023, 1000},
			{1011, 1012, 1013, 1000, 1000, 1000},
			{1021, 1022, 1023, 1000, 1000, 1000},
			{1000, 1000, 1000, 1000, 1000, 1000},
		})
	}
}
示例#5
0
// SolveR solves the linear Real system A.x = b
//  NOTES:
//    1) sum_b_to_root is a flag for MUMPS; it tells Solve to sum the values in 'b' arrays to the root processor
func (o *LinSolMumps) SolveR(xR, bR []float64, sum_b_to_root bool) (err error) {

	// check
	if !o.is_initialised {
		return chk.Err("linear solver must be initialised first\n")
	}
	if o.cmplx {
		return chk.Err(_linsol_mumps_err09)
	}

	// start time
	if o.ton {
		o.tini = time.Now()
	}

	// message
	if o.verb {
		io.Pfgreen("\n . . . . . . . . . . . . . . LinSolMumps.SolveR . . . . . . . . . . . . . . . \n\n")
	}

	// MUMPS: set RHS in processor # 0
	if sum_b_to_root {
		mpi.SumToRoot(xR, bR)
	} else {
		if mpi.Rank() == 0 {
			copy(xR, bR) // x := b
		}
	}

	// only proc # 0 needs the RHS
	if mpi.Rank() == 0 {
		o.m.rhs = (*C.double)(unsafe.Pointer(&xR[0]))
	}

	// MUMPS: solve
	o.m.job = 3      // solution code
	C.dmumps_c(&o.m) // solve
	if o.m.info[1-1] < 0 {
		return chk.Err(_linsol_mumps_err10, mumps_error(o.m.info[1-1], o.m.info[2-1]))
	}
	mpi.BcastFromRoot(xR) // broadcast from root

	// duration
	if o.ton {
		io.Pfcyan("%s: Time spent in LinSolMumps.Solve = %v\n", o.name, time.Now().Sub(o.tini))
	}
	return
}
示例#6
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		chk.PrintTitle("TestJacobian 02b (MPI)")
	}
	if mpi.Size() > 6 {
		io.Pf("this tests works with 6 or less MPI processors\n")
		return
	}

	ffcn := func(fx, x []float64) error {
		fx[0] = 2.0*x[0] - x[1] + sin(x[2]) - cos(x[3]) - x[5]*x[5] - 1.0      // 0
		fx[1] = -x[0] + 2.0*x[1] + cos(x[2]) - sin(x[3]) + x[5] - 1.0          // 1
		fx[2] = x[0] + 3.0*x[1] + sin(x[3]) - cos(x[4]) - x[5]*x[5] - 1.0      // 2
		fx[3] = 2.0*x[0] + 4.0*x[1] + cos(x[3]) - cos(x[4]) + x[5] - 1.0       // 3
		fx[4] = x[0] + 5.0*x[1] - sin(x[2]) + sin(x[4]) - x[5]*x[5]*x[5] - 1.0 // 4
		fx[5] = x[0] + 6.0*x[1] - cos(x[2]) + cos(x[4]) + x[5] - 1.0           // 5
		return nil
	}
	Jfcn := func(dfdx *la.Triplet, x []float64) error {
		dfdx.Start()
		J := [][]float64{
			{2.0, -1.0, cos(x[2]), sin(x[3]), 0.0, -2.0 * x[5]},
			{-1.0, 2.0, -sin(x[2]), -cos(x[3]), 0.0, 1.0},
			{1.0, 3.0, 0.0, cos(x[3]), sin(x[4]), -2.0 * x[5]},
			{2.0, 4.0, 0.0, -sin(x[3]), sin(x[4]), 1.0},
			{1.0, 5.0, -cos(x[2]), 0.0, cos(x[4]), -3.0 * x[5] * x[5]},
			{1.0, 6.0, sin(x[2]), 0.0, -sin(x[4]), 1.0},
		}
		id, sz, ndim := mpi.Rank(), mpi.Size(), 6
		start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz
		for col := 0; col < 6; col++ {
			for row := start; row < endp1; row++ {
				dfdx.Put(row, col, J[row][col])
			}
		}
		//la.PrintMat(fmt.Sprintf("J @ %d",mpi.Rank()), dfdx.ToMatrix(nil).ToDense(), "%12.6f", false)
		return nil
	}
	x := []float64{5.0, 5.0, pi, pi, pi, 5.0}
	var tst testing.T
	num.CompareJac(&tst, ffcn, Jfcn, x, 1e-6, true)
}
示例#7
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	myrank := mpi.Rank()
	if myrank == 0 {
		chk.PrintTitle("Test MUMPS Sol 05")
	}

	ndim := 10
	id, sz := mpi.Rank(), mpi.Size()
	start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz

	if mpi.Size() > ndim {
		chk.Panic("the number of processors must be smaller than or equal to %d", ndim)
	}

	n := 10
	b := make([]complex128, n)
	x_correct := make([]complex128, n)

	// Let exact solution = 1 + 0.5i
	for i := 0; i < ndim; i++ {
		x_correct[i] = complex(float64(i+1), float64(i+1)/10.0)
	}

	var t la.TripletC
	t.Init(ndim, ndim, ndim, true)

	// assemble a and b
	for i := start; i < endp1; i++ {

		// Some very fake diagonals. Should take exactly 20 GMRES steps
		ar := 10.0 + float64(i)/(float64(ndim)/10.0)
		ac := 10.0 - float64(i)/(float64(ndim)/10.0)
		t.Put(i, i, ar, ac)

		// Generate RHS to match exact solution
		b[i] = complex(ar*real(x_correct[i])-ac*imag(x_correct[i]),
			ar*imag(x_correct[i])+ac*real(x_correct[i]))
	}

	sum_b_to_root := true
	la.RunMumpsTestC(&t, 1e-14, b, x_correct, sum_b_to_root)
}
示例#8
0
func main() {

	// catch errors
	var tst testing.T
	defer func() {
		if mpi.Rank() == 0 {
			if err := recover(); err != nil {
				io.PfRed("ERROR: %v\n", err)
			}
			if tst.Failed() {
				io.PfRed("test failed\n")
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// start global variables and log
	analysis := fem.NewFEM("data/bh16.sim", "", true, true, false, true, true, 0)

	// run simulation
	err := analysis.Run()
	if err != nil {
		tst.Error("Run failed\n")
		return
	}

	// check
	skipK := true
	tolK := 1e-12
	tolu := 1e-15
	tols := 1e-12
	fem.TestingCompareResultsU(&tst, "data/bh16.sim", "cmp/bh16.cmp", "", tolK, tolu, tols, skipK, true)
}
示例#9
0
func main() {

	// catch errors
	var tst testing.T
	defer func() {
		if mpi.Rank() == 0 {
			if err := recover(); err != nil {
				io.PfRed("ERROR: %v\n", err)
			}
			if tst.Failed() {
				io.PfRed("test failed\n")
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// start global variables and log
	analysis := fem.NewFEM("data/p01.sim", "", true, true, false, true, true, 0)

	// run simulation
	err := analysis.Run()
	if err != nil {
		tst.Error("Run failed\n")
		return
	}
}
示例#10
0
func main() {

	// catch errors
	var tst testing.T
	defer func() {
		if mpi.Rank() == 0 {
			if err := recover(); err != nil {
				io.PfRed("ERROR: %v\n", err)
			}
			if tst.Failed() {
				io.PfRed("test failed\n")
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// start global variables and log
	if !fem.Start("data/p01.sim", true, true) {
		tst.Error("Start failed\n")
		return
	}

	// make sure to flush log
	defer fem.End()

	// run simulation
	if !fem.Run() {
		tst.Error("Run failed\n")
		return
	}
}
示例#11
0
func (o *Solver) init_mpi() {
	if mpi.IsOn() {
		o.root = (mpi.Rank() == 0)
		if mpi.Size() > 1 {
			o.Distr = true
		}
	}
}
示例#12
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		chk.PrintTitle("TestJacobian 01b (MPI)")
	}
	if mpi.Size() != 2 {
		io.Pf("this tests needs MPI 2 processors\n")
		return
	}

	ffcn := func(fx, x []float64) error {
		fx[0] = math.Pow(x[0], 3.0) + x[1] - 1.0
		fx[1] = -x[0] + math.Pow(x[1], 3.0) + 1.0
		return nil
	}
	Jfcn := func(dfdx *la.Triplet, x []float64) error {
		dfdx.Start()
		if false {
			if mpi.Rank() == 0 {
				dfdx.Put(0, 0, 3.0*x[0]*x[0])
				dfdx.Put(1, 0, -1.0)
			} else {
				dfdx.Put(0, 1, 1.0)
				dfdx.Put(1, 1, 3.0*x[1]*x[1])
			}
		} else {
			if mpi.Rank() == 0 {
				dfdx.Put(0, 0, 3.0*x[0]*x[0])
				dfdx.Put(0, 1, 1.0)
			} else {
				dfdx.Put(1, 0, -1.0)
				dfdx.Put(1, 1, 3.0*x[1]*x[1])
			}
		}
		return nil
	}
	x := []float64{0.5, 0.5}
	var tst testing.T
	num.CompareJacMpi(&tst, ffcn, Jfcn, x, 1e-8, true)
}
示例#13
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	myrank := mpi.Rank()
	if myrank == 0 {
		chk.PrintTitle("Test MUMPS Sol 01a")
	}

	var t la.Triplet
	switch mpi.Size() {
	case 1:
		t.Init(5, 5, 13)
		t.Put(0, 0, 1.0)
		t.Put(0, 0, 1.0)
		t.Put(1, 0, 3.0)
		t.Put(0, 1, 3.0)
		t.Put(2, 1, -1.0)
		t.Put(4, 1, 4.0)
		t.Put(1, 2, 4.0)
		t.Put(2, 2, -3.0)
		t.Put(3, 2, 1.0)
		t.Put(4, 2, 2.0)
		t.Put(2, 3, 2.0)
		t.Put(1, 4, 6.0)
		t.Put(4, 4, 1.0)
	case 2:
		if myrank == 0 {
			t.Init(5, 5, 6)
			t.Put(0, 0, 1.0)
			t.Put(0, 0, 1.0)
			t.Put(1, 0, 3.0)
			t.Put(0, 1, 3.0)
			t.Put(2, 1, -1.0)
			t.Put(4, 1, 4.0)
		} else {
			t.Init(5, 5, 7)
			t.Put(1, 2, 4.0)
			t.Put(2, 2, -3.0)
			t.Put(3, 2, 1.0)
			t.Put(4, 2, 2.0)
			t.Put(2, 3, 2.0)
			t.Put(1, 4, 6.0)
			t.Put(4, 4, 1.0)
		}
	default:
		chk.Panic("this test needs 1 or 2 procs")
	}

	b := []float64{8.0, 45.0, -3.0, 3.0, 19.0}
	x_correct := []float64{1, 2, 3, 4, 5}
	sum_b_to_root := false
	la.RunMumpsTestR(&t, 1e-14, b, x_correct, sum_b_to_root)
}
示例#14
0
func setslice(x []float64) {
	switch mpi.Rank() {
	case 0:
		copy(x, []float64{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3})
	case 1:
		copy(x, []float64{10, 10, 10, 20, 20, 20, 30, 30, 30, 40, 40})
	case 2:
		copy(x, []float64{100, 100, 100, 1000, 1000, 1000, 2000, 2000, 2000, 3000, 3000})
	}
}
示例#15
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	myrank := mpi.Rank()
	if myrank == 0 {
		chk.PrintTitle("Test MUMPS Sol 04")
	}

	ndim := 10
	id, sz := mpi.Rank(), mpi.Size()
	start, endp1 := (id*ndim)/sz, ((id+1)*ndim)/sz

	if mpi.Size() > ndim {
		chk.Panic("the number of processors must be smaller than or equal to %d", ndim)
	}

	b := make([]complex128, ndim)
	var t la.TripletC
	t.Init(ndim, ndim, ndim*ndim, true)

	for i := start; i < endp1; i++ {
		j := i
		if i > 0 {
			j = i - 1
		}
		for ; j < 10; j++ {
			val := 10.0 - float64(j)
			if i > j {
				val -= 1.0
			}
			t.Put(i, j, val, 0)
		}
		b[i] = complex(float64(i+1), 0.0)
	}

	x_correct := []complex128{-1, 8, -65, 454, -2725, 13624, -54497, 163490, -326981, 326991}
	sum_b_to_root := true
	la.RunMumpsTestC(&t, 1e-4, b, x_correct, sum_b_to_root)
}
示例#16
0
func RunMumpsTestC(t *TripletC, tol_cmp float64, b, x_correct []complex128, sum_b_to_root bool) {

	// info
	symmetric := false
	verbose := false
	timing := false

	// allocate solver
	lis := GetSolver("mumps")
	defer lis.Clean()

	// initialise solver
	err := lis.InitC(t, symmetric, verbose, timing)
	if err != nil {
		chk.Panic("%v", err.Error())
	}

	// factorise
	err = lis.Fact()
	if err != nil {
		chk.Panic("%v", err.Error())
	}

	// solve
	bR, bC := ComplexToRC(b)
	xR := make([]float64, len(b))
	xC := make([]float64, len(b))
	err = lis.SolveC(xR, xC, bR, bC, sum_b_to_root) // x := inv(A) * b
	if err != nil {
		chk.Panic("%v", err.Error())
	}
	x := RCtoComplex(xR, xC)

	if mpi.Rank() == 0 {
		// output
		A := t.ToMatrix(nil)
		io.Pforan("A.x = b\n")
		PrintMatC("A", A.ToDense(), "(%g+", "%gi) ", false)
		PrintVecC("x", x, "(%g+", "%gi) ", false)
		PrintVecC("b", b, "(%g+", "%gi) ", false)

		// check
		xR_correct, xC_correct := ComplexToRC(x_correct)
		errR := VecMaxDiff(xR, xR_correct)
		if errR > tol_cmp {
			chk.Panic("test failed: errR = %g", errR)
		}
		errC := VecMaxDiff(xC, xC_correct)
		if errC > tol_cmp {
			chk.Panic("test failed: errC = %g", errC)
		}
		io.Pf("err(xR) = %g OK\n", errR)
		io.Pf("err(xC) = %g OK\n", errC)
	}
}
示例#17
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		io.PfYel("\nTest MPI 04\n")
	}

	for i := 0; i < 60; i++ {
		time.Sleep(1e9)
		io.Pf("hello from %v\n", mpi.Rank())
		if mpi.Rank() == 2 && i == 3 {
			io.PfGreen("rank = 3 wants to abort (the following error is OK)\n")
			mpi.Abort()
		}
	}
}
示例#18
0
// Start initialises 'global' and starts logging
func Start(simfilepath string, erasefiles, verbose bool) (startisok bool) {

	// multiprocessing data
	Global.Rank = 0
	Global.Nproc = 1
	Global.Root = true
	Global.Distr = false
	if mpi.IsOn() {
		Global.Rank = mpi.Rank()
		Global.Nproc = mpi.Size()
		Global.Root = Global.Rank == 0
		Global.Distr = Global.Nproc > 1
	}
	Global.Verbose = verbose
	if !Global.Root {
		Global.Verbose = false
	}
	Global.WspcStop = make([]int, Global.Nproc)
	Global.WspcInum = make([]int, Global.Nproc)

	// simulation and convenience variables
	dir := filepath.Dir(simfilepath)
	fn := filepath.Base(simfilepath)
	Global.Sim = inp.ReadSim(dir, fn, Global.LogPrefix, erasefiles)
	LogErrCond(Global.Sim == nil, "ReadSim failed\n")
	if Stop() {
		return
	}
	Global.Ndim = Global.Sim.Ndim
	Global.Dirout = Global.Sim.Data.DirOut
	Global.Fnkey = Global.Sim.Data.FnameKey
	Global.Enc = Global.Sim.Data.Encoder
	Global.Stat = Global.Sim.Data.Stat
	Global.LogBcs = Global.Sim.Data.LogBcs
	Global.Debug = Global.Sim.Data.Debug

	// fix show residual flag
	if !Global.Root {
		Global.Sim.Data.ShowR = false
	}

	// auxiliar structures
	Global.DynCoefs = new(DynCoefs)
	if !Global.DynCoefs.Init(&Global.Sim.Solver) {
		return
	}
	Global.HydroSt = new(HydroStatic)
	Global.HydroSt.Init()

	// success
	return true
}
示例#19
0
/*  Jacobian
    ========
        Calculates (with N=n-1):
            df0dx0, df0dx1, df0dx2, ... df0dxN
            df1dx0, df1dx1, df1dx2, ... df1dxN
                 . . . . . . . . . . . . .
            dfNdx0, dfNdx1, dfNdx2, ... dfNdxN
    INPUT:
        ffcn : f(x) function
        x    : station where dfdx has to be calculated
        fx   : f @ x
        w    : workspace with size == n == len(x)
    RETURNS:
        J : dfdx @ x [must be pre-allocated]        */
func Jacobian(J *la.Triplet, ffcn Cb_f, x, fx, w []float64, distr bool) (err error) {
	ndim := len(x)
	start, endp1 := 0, ndim
	if distr {
		id, sz := mpi.Rank(), mpi.Size()
		start, endp1 = (id*ndim)/sz, ((id+1)*ndim)/sz
		if J.Max() == 0 {
			J.Init(ndim, ndim, (endp1-start)*ndim)
		}
	} else {
		if J.Max() == 0 {
			J.Init(ndim, ndim, ndim*ndim)
		}
	}
	J.Start()
	// NOTE: cannot split calculation by columns unless the f function is
	//       independently calculated by each MPI processor.
	//       Otherwise, the AllReduce in f calculation would
	//       join pieces of f from different processors calculated for
	//       different x values (δx[col] from different columns).
	/*
	   for col := start; col < endp1; col++ {
	       xsafe := x[col]
	       delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe)))
	       x[col] = xsafe + delta
	       ffcn(w, x) // fnew
	       io.Pforan("x = %v, f = %v\n", x, w)
	       for row := 0; row < ndim; row++ {
	           J.Put(row, col, (w[row]-fx[row])/delta)
	       }
	       x[col] = xsafe
	   }
	*/
	var df float64
	for col := 0; col < ndim; col++ {
		xsafe := x[col]
		delta := math.Sqrt(EPS * max(CTE1, math.Abs(xsafe)))
		x[col] = xsafe + delta
		err = ffcn(w, x) // w := f(x+δx[col])
		if err != nil {
			return
		}
		for row := start; row < endp1; row++ {
			df = w[row] - fx[row]
			//if math.Abs(df) > EPS {
			J.Put(row, col, df/delta)
			//}
		}
		x[col] = xsafe
	}
	return
}
示例#20
0
// InitLogFile initialises logger
func InitLogFile(dirout, fnamekey string) (err error) {

	// create log file
	var rank int
	if mpi.IsOn() {
		rank = mpi.Rank()
	}
	LogFile, err = os.Create(io.Sf("%s/%s_p%d.log", dirout, fnamekey, rank))
	if err != nil {
		return
	}

	// connect logger to output file
	log.SetOutput(LogFile)
	return
}
示例#21
0
func RunMumpsTestR(t *Triplet, tol_cmp float64, b, x_correct []float64, sum_b_to_root bool) {

	// info
	symmetric := false
	verbose := false
	timing := false

	// allocate solver
	lis := GetSolver("mumps")
	defer lis.Clean()

	// initialise solver
	err := lis.InitR(t, symmetric, verbose, timing)
	if err != nil {
		chk.Panic("%v", err.Error())
	}

	// factorise
	err = lis.Fact()
	if err != nil {
		chk.Panic("%v", err.Error())
	}

	// solve
	x := make([]float64, len(b))
	err = lis.SolveR(x, b, sum_b_to_root) // x := inv(A) * b
	if err != nil {
		chk.Panic("%v", err.Error())
	}

	if mpi.Rank() == 0 {
		// output
		A := t.ToMatrix(nil)
		io.Pforan("A.x = b\n")
		PrintMat("A", A.ToDense(), "%5g", false)
		PrintVec("x", x, "%g ", false)
		PrintVec("b", b, "%g ", false)

		// check
		err := VecMaxDiff(x, x_correct)
		if err > tol_cmp {
			chk.Panic("test failed: err = %g", err)
		}
		io.Pf("err(x) = %g OK\n", err)
	}
}
示例#22
0
// SpTriSumToRoot join (MPI) parallel triplets to root (Rank == 0) processor.
//  NOTE: J in root is also joined into Jroot
func SpTriSumToRoot(J *Triplet) {
	if mpi.Rank() == 0 {
		for proc := 1; proc < mpi.Size(); proc++ {
			nnz := mpi.SingleIntRecv(proc)
			irec := make([]int, nnz)
			drec := make([]float64, nnz)
			mpi.IntRecv(irec, proc)
			J.i = append(J.i, irec...)
			mpi.IntRecv(irec, proc)
			J.j = append(J.j, irec...)
			mpi.DblRecv(drec, proc)
			J.x = append(J.x, drec...)
		}
		J.pos = len(J.x)
		J.max = J.pos
	} else {
		mpi.SingleIntSend(J.max, 0)
		mpi.IntSend(J.i, 0)
		mpi.IntSend(J.j, 0)
		mpi.DblSend(J.x, 0)
	}
}
示例#23
0
func main() {

	// catch errors
	var tst testing.T
	defer func() {
		if mpi.Rank() == 0 {
			if err := recover(); err != nil {
				io.PfRed("ERROR: %v\n", err)
			}
			if tst.Failed() {
				io.PfRed("test failed\n")
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// start global variables and log
	if !fem.Start("data/spo751.sim", true, true) {
		tst.Error("Start failed\n")
		return
	}

	// make sure to flush log
	defer fem.End()

	// run simulation
	if !fem.Run() {
		tst.Error("Run failed\n")
		return
	}

	// check
	skipK := true
	tolK := 1e-17
	tolu := 1e-12
	tols := 1e-14
	fem.TestingCompareResultsU(&tst, "data/spo751.sim", "cmp/spo751.cmp", tolK, tolu, tols, skipK, true)
}
示例#24
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		chk.PrintTitle("Test ODE 02b")
		io.Pfcyan("Hairer-Wanner VII-p5 Eq.(1.5) Van der Pol's Equation (MPI)\n")
	}
	if mpi.Size() != 2 {
		chk.Panic(">> error: this test requires 2 MPI processors\n")
		return
	}

	eps := 1.0e-6
	w := make([]float64, 2) // workspace
	fcn := func(f []float64, x float64, y []float64, args ...interface{}) error {
		f[0], f[1] = 0, 0
		switch mpi.Rank() {
		case 0:
			f[0] = y[1]
		case 1:
			f[1] = ((1.0-y[0]*y[0])*y[1] - y[0]) / eps
		}
		// join all f
		mpi.AllReduceSum(f, w)
		return nil
	}
	jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error {
		if dfdy.Max() == 0 {
			dfdy.Init(2, 2, 4)
		}
		dfdy.Start()
		if false { // per column
			switch mpi.Rank() {
			case 0:
				dfdy.Put(0, 0, 0.0)
				dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps)
			case 1:
				dfdy.Put(0, 1, 1.0)
				dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps)
			}
		} else { // per row
			switch mpi.Rank() {
			case 0:
				dfdy.Put(0, 0, 0.0)
				dfdy.Put(0, 1, 1.0)
			case 1:
				dfdy.Put(1, 0, (-2.0*y[0]*y[1]-1.0)/eps)
				dfdy.Put(1, 1, (1.0-y[0]*y[0])/eps)
			}
		}
		return nil
	}

	// data
	silent := false
	fixstp := false
	//method := "Dopri5"
	method := "Radau5"
	xa, xb := 0.0, 2.0
	ya := []float64{2.0, -0.6}
	ndim := len(ya)

	// output
	var b bytes.Buffer
	out := func(first bool, dx, x float64, y []float64, args ...interface{}) error {
		if mpi.Rank() == 0 {
			if first {
				fmt.Fprintf(&b, "%23s %23s %23s %23s\n", "dx", "x", "y0", "y1")
			}
			fmt.Fprintf(&b, "%23.15E %23.15E %23.15E %23.15E\n", dx, x, y[0], y[1])
		}
		return nil
	}
	defer func() {
		if mpi.Rank() == 0 {
			extra := "d2 = Read('data/vdpol_radau5_for.dat')\n" +
				"subplot(3,1,1)\n" +
				"plot(d2['x'],d2['y0'],'k+',label='res',ms=10)\n" +
				"subplot(3,1,2)\n" +
				"plot(d2['x'],d2['y1'],'k+',label='res',ms=10)\n"
			ode.Plot("/tmp/gosl", "vdpolB", method, &b, []int{0, 1}, ndim, nil, xa, xb, true, false, extra)
		}
	}()

	// one run
	var o ode.ODE
	o.Distr = true
	//numjac := true
	numjac := false
	if numjac {
		o.Init(method, ndim, fcn, nil, nil, out, silent)
	} else {
		o.Init(method, ndim, fcn, jac, nil, out, silent)
	}

	// tolerances and initial step size
	rtol := 1e-4
	atol := rtol
	o.SetTol(atol, rtol)
	o.IniH = 1.0e-4

	//o.NmaxSS = 2

	y := make([]float64, ndim)
	copy(y, ya)
	t0 := time.Now()
	if fixstp {
		o.Solve(y, xa, xb, 0.05, fixstp)
	} else {
		o.Solve(y, xa, xb, xb-xa, fixstp)
	}
	if mpi.Rank() == 0 {
		io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0))
	}
}
示例#25
0
文件: main.go 项目: PatrickSchm/gofem
func main() {

	// catch errors
	defer func() {
		if err := recover(); err != nil {
			if mpi.Rank() == 0 {
				chk.Verbose = true
				for i := 8; i > 3; i-- {
					chk.CallerInfo(i)
				}
				io.PfRed("ERROR: %v\n", err)
			}
		}
		mpi.Stop(false)
	}()
	mpi.Start(false)

	// message
	if mpi.Rank() == 0 {
		io.PfWhite("\nGofem v3 -- Go Finite Element Method\n\n")
		io.Pf("Copyright 2015 Dorival Pedroso and Raul Durand. All rights reserved.\n")
		io.Pf("Use of this source code is governed by a BSD-style\n")
		io.Pf("license that can be found in the LICENSE file.\n\n")
	}

	// simulation filenamepath
	flag.Parse()
	var fnamepath string
	if len(flag.Args()) > 0 {
		fnamepath = flag.Arg(0)
	} else {
		chk.Panic("Please, provide a filename. Ex.: cylinder.sim")
	}

	// check extension
	if io.FnExt(fnamepath) == "" {
		fnamepath += ".sim"
	}

	// other options
	erasefiles := true
	verbose := true
	if len(flag.Args()) > 1 {
		erasefiles = io.Atob(flag.Arg(1))
	}
	if len(flag.Args()) > 2 {
		verbose = io.Atob(flag.Arg(2))
	}

	// profiling?
	defer utl.DoProf(false)()

	// start global variables and log
	if !fem.Start(fnamepath, erasefiles, verbose) {
		chk.Panic("Start failed\n")
		return
	}

	// make sure to flush log
	defer fem.End()

	// run simulation
	if !fem.Run() {
		io.PfRed("ERROR: cannot run simulation\n")
	}
}
示例#26
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		chk.PrintTitle("Test ODE 04b (MPI)")
		io.Pfcyan("Hairer-Wanner VII-p376 Transistor Amplifier (MPI)\n")
		io.Pfcyan("(from E Hairer's website, not the system in the book)\n")
	}
	if mpi.Size() != 3 {
		chk.Panic(">> error: this test requires 3 MPI processors\n")
		return
	}

	// RIGHT-HAND SIDE OF THE AMPLIFIER PROBLEM
	w := make([]float64, 8) // workspace
	fcn := func(f []float64, x float64, y []float64, args ...interface{}) error {
		d := args[0].(*HWtransData)
		UET := d.UE * math.Sin(d.W*x)
		FAC1 := d.BETA * (math.Exp((y[3]-y[2])/d.UF) - 1.0)
		FAC2 := d.BETA * (math.Exp((y[6]-y[5])/d.UF) - 1.0)
		la.VecFill(f, 0)
		switch mpi.Rank() {
		case 0:
			f[0] = y[0] / d.R9
		case 1:
			f[1] = (y[1]-d.UB)/d.R8 + d.ALPHA*FAC1
			f[2] = y[2]/d.R7 - FAC1
		case 2:
			f[3] = y[3]/d.R5 + (y[3]-d.UB)/d.R6 + (1.0-d.ALPHA)*FAC1
			f[4] = (y[4]-d.UB)/d.R4 + d.ALPHA*FAC2
			f[5] = y[5]/d.R3 - FAC2
			f[6] = y[6]/d.R1 + (y[6]-d.UB)/d.R2 + (1.0-d.ALPHA)*FAC2
			f[7] = (y[7] - UET) / d.R0
		}
		mpi.AllReduceSum(f, w)
		return nil
	}

	// JACOBIAN OF THE AMPLIFIER PROBLEM
	jac := func(dfdy *la.Triplet, x float64, y []float64, args ...interface{}) error {
		d := args[0].(*HWtransData)
		FAC14 := d.BETA * math.Exp((y[3]-y[2])/d.UF) / d.UF
		FAC27 := d.BETA * math.Exp((y[6]-y[5])/d.UF) / d.UF
		if dfdy.Max() == 0 {
			dfdy.Init(8, 8, 16)
		}
		NU := 2
		dfdy.Start()
		switch mpi.Rank() {
		case 0:
			dfdy.Put(2+0-NU, 0, 1.0/d.R9)
			dfdy.Put(2+1-NU, 1, 1.0/d.R8)
			dfdy.Put(1+2-NU, 2, -d.ALPHA*FAC14)
			dfdy.Put(0+3-NU, 3, d.ALPHA*FAC14)
			dfdy.Put(2+2-NU, 2, 1.0/d.R7+FAC14)
		case 1:
			dfdy.Put(1+3-NU, 3, -FAC14)
			dfdy.Put(2+3-NU, 3, 1.0/d.R5+1.0/d.R6+(1.0-d.ALPHA)*FAC14)
			dfdy.Put(3+2-NU, 2, -(1.0-d.ALPHA)*FAC14)
			dfdy.Put(2+4-NU, 4, 1.0/d.R4)
			dfdy.Put(1+5-NU, 5, -d.ALPHA*FAC27)
		case 2:
			dfdy.Put(0+6-NU, 6, d.ALPHA*FAC27)
			dfdy.Put(2+5-NU, 5, 1.0/d.R3+FAC27)
			dfdy.Put(1+6-NU, 6, -FAC27)
			dfdy.Put(2+6-NU, 6, 1.0/d.R1+1.0/d.R2+(1.0-d.ALPHA)*FAC27)
			dfdy.Put(3+5-NU, 5, -(1.0-d.ALPHA)*FAC27)
			dfdy.Put(2+7-NU, 7, 1.0/d.R0)
		}
		return nil
	}

	// MATRIX "M"
	c1, c2, c3, c4, c5 := 1.0e-6, 2.0e-6, 3.0e-6, 4.0e-6, 5.0e-6
	var M la.Triplet
	M.Init(8, 8, 14)
	M.Start()
	NU := 1
	switch mpi.Rank() {
	case 0:
		M.Put(1+0-NU, 0, -c5)
		M.Put(0+1-NU, 1, c5)
		M.Put(2+0-NU, 0, c5)
		M.Put(1+1-NU, 1, -c5)
		M.Put(1+2-NU, 2, -c4)
		M.Put(1+3-NU, 3, -c3)
	case 1:
		M.Put(0+4-NU, 4, c3)
		M.Put(2+3-NU, 3, c3)
		M.Put(1+4-NU, 4, -c3)
	case 2:
		M.Put(1+5-NU, 5, -c2)
		M.Put(1+6-NU, 6, -c1)
		M.Put(0+7-NU, 7, c1)
		M.Put(2+6-NU, 6, c1)
		M.Put(1+7-NU, 7, -c1)
	}

	// WRITE FILE FUNCTION
	idxstp := 1
	var b bytes.Buffer
	out := func(first bool, dx, x float64, y []float64, args ...interface{}) error {
		if mpi.Rank() == 0 {
			if first {
				fmt.Fprintf(&b, "%6s%23s%23s%23s%23s%23s%23s%23s%23s%23s\n", "ns", "x", "y0", "y1", "y2", "y3", "y4", "y5", "y6", "y7")
			}
			fmt.Fprintf(&b, "%6d%23.15E", idxstp, x)
			for j := 0; j < len(y); j++ {
				fmt.Fprintf(&b, "%23.15E", y[j])
			}
			fmt.Fprintf(&b, "\n")
			idxstp += 1
		}
		return nil
	}
	defer func() {
		if mpi.Rank() == 0 {
			io.WriteFileD("/tmp/gosl", "hwamplifierB.res", &b)
		}
	}()

	// INITIAL DATA
	D, xa, xb, ya := HWtransIni()

	// SET ODE SOLVER
	silent := false
	fixstp := false
	//method := "Dopri5"
	method := "Radau5"
	ndim := len(ya)
	//numjac := true
	numjac := false
	var osol ode.ODE

	osol.Pll = true

	if numjac {
		osol.Init(method, ndim, fcn, nil, &M, out, silent)
	} else {
		osol.Init(method, ndim, fcn, jac, &M, out, silent)
	}
	osol.IniH = 1.0e-6 // initial step size

	// SET TOLERANCES
	atol, rtol := 1e-11, 1e-5
	osol.SetTol(atol, rtol)

	// RUN
	t0 := time.Now()
	if fixstp {
		osol.Solve(ya, xa, xb, 0.01, fixstp, &D)
	} else {
		osol.Solve(ya, xa, xb, xb-xa, fixstp, &D)
	}
	if mpi.Rank() == 0 {
		io.Pfmag("elapsed time = %v\n", time.Now().Sub(t0))
	}
}
示例#27
0
文件: ode.go 项目: PaddySchmidt/gosl
// Init initialises ODE structure with default values and allocate slices
func (o *ODE) Init(method string, ndim int, fcn Cb_fcn, jac Cb_jac, M *la.Triplet, out Cb_out, silent bool) {

	// primary variables
	o.method = method
	o.ndim = ndim
	o.fcn = fcn
	o.jac = jac
	o.out = out
	o.silent = silent
	o.ZeroTrial = false
	o.Atol = 1.0e-4
	o.Rtol = 1.0e-4
	o.IniH = 1.0e-4
	o.NmaxIt = 7
	o.NmaxSS = 1000
	o.Mmin = 0.125
	o.Mmax = 5.0
	o.Mfac = 0.9
	o.PredCtrl = true
	o.ϵ = 1.0e-16
	o.θmax = 1.0e-3
	o.C1h = 1.0
	o.C2h = 1.2
	o.LerrStrat = 3
	o.Pll = true
	o.UseRmsNorm = true
	o.SetTol(o.Atol, o.Rtol)

	// derived variables
	o.root = true
	if mpi.IsOn() {
		o.root = (mpi.Rank() == 0)
		if mpi.Size() > 1 {
			o.Distr = true
		}
	}

	// M matrix
	if M != nil {
		o.mTri = M
		o.mMat = o.mTri.ToMatrix(nil)
		o.hasM = true
	} else {
		if o.method == "BwEuler" {
			M = new(la.Triplet)
			la.SpTriSetDiag(M, o.ndim, 1)
			o.mTri = M
			o.mMat = o.mTri.ToMatrix(nil)
			o.hasM = true
		}
	}

	// method
	switch method {
	case "FwEuler":
		o.step = fweuler_step
		o.accept = fweuler_accept
		o.nstg = 1
	case "BwEuler":
		o.step = bweuler_step
		o.accept = bweuler_accept
		o.nstg = 1
	case "MoEuler":
		o.step = erk_step
		o.accept = erk_accept
		o.nstg = 2
		o.erkdat = ERKdat{true, ME2_a, ME2_b, ME2_be, ME2_c}
	case "Dopri5":
		o.step = erk_step
		o.accept = erk_accept
		o.nstg = 7
		o.erkdat = ERKdat{true, DP5_a, DP5_b, DP5_be, DP5_c}
	case "Radau5":
		o.step = radau5_step
		o.accept = radau5_accept
		o.nstg = 3
	default:
		chk.Panic(_ode_err1, method)
	}

	// allocate step variables
	o.f0 = make([]float64, o.ndim)
	o.scal = make([]float64, o.ndim)

	// allocate rk variables
	o.u = make([]float64, o.nstg)
	o.v = make([][]float64, o.nstg)
	o.w = make([][]float64, o.nstg)
	o.δw = make([][]float64, o.nstg)
	o.f = make([][]float64, o.nstg)
	if method == "Radau5" {
		o.z = make([][]float64, o.nstg)
		o.ycol = make([][]float64, o.nstg)
		o.ez = make([]float64, o.ndim)
		o.lerr = make([]float64, o.ndim)
		o.rhs = make([]float64, o.ndim)
	}
	for i := 0; i < o.nstg; i++ {
		o.v[i] = make([]float64, o.ndim)
		o.w[i] = make([]float64, o.ndim)
		o.δw[i] = make([]float64, o.ndim)
		o.f[i] = make([]float64, o.ndim)
		if method == "Radau5" {
			o.z[i] = make([]float64, o.ndim)
			o.ycol[i] = make([]float64, o.ndim)
		}
	}
}
示例#28
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	myrank := mpi.Rank()
	if myrank == 0 {
		chk.PrintTitle("Test MUMPS Sol 01b")
	}

	var t la.Triplet
	b := []float64{8.0, 45.0, -3.0, 3.0, 19.0}
	switch mpi.Size() {
	case 1:
		t.Init(5, 5, 13)
		t.Put(0, 0, 1.0)
		t.Put(0, 0, 1.0)
		t.Put(1, 0, 3.0)
		t.Put(0, 1, 3.0)
		t.Put(2, 1, -1.0)
		t.Put(4, 1, 4.0)
		t.Put(1, 2, 4.0)
		t.Put(2, 2, -3.0)
		t.Put(3, 2, 1.0)
		t.Put(4, 2, 2.0)
		t.Put(2, 3, 2.0)
		t.Put(1, 4, 6.0)
		t.Put(4, 4, 1.0)
	case 2:
		la.VecFill(b, 0)
		if myrank == 0 {
			t.Init(5, 5, 8)
			t.Put(0, 0, 1.0)
			t.Put(0, 0, 1.0)
			t.Put(1, 0, 3.0)
			t.Put(0, 1, 3.0)
			t.Put(2, 1, -1.0)
			t.Put(4, 1, 1.0)
			t.Put(4, 1, 1.5)
			t.Put(4, 1, 1.5)
			b[0] = 8.0
			b[1] = 40.0
			b[2] = 1.5
		} else {
			t.Init(5, 5, 8)
			t.Put(1, 2, 4.0)
			t.Put(2, 2, -3.0)
			t.Put(3, 2, 1.0)
			t.Put(4, 2, 2.0)
			t.Put(2, 3, 2.0)
			t.Put(1, 4, 6.0)
			t.Put(4, 4, 0.5)
			t.Put(4, 4, 0.5)
			b[1] = 5.0
			b[2] = -4.5
			b[3] = 3.0
			b[4] = 19.0
		}
	default:
		chk.Panic("this test needs 1 or 2 procs")
	}

	x_correct := []float64{1, 2, 3, 4, 5}
	sum_b_to_root := true
	la.RunMumpsTestR(&t, 1e-14, b, x_correct, sum_b_to_root)
}
示例#29
0
func main() {

	mpi.Start(false)
	defer func() {
		mpi.Stop(false)
	}()

	if mpi.Rank() == 0 {
		io.PfYel("\nTest MPI 01\n")
	}
	if mpi.Size() != 3 {
		chk.Panic("this test needs 3 processors")
	}
	n := 11
	x := make([]float64, n)
	id, sz := mpi.Rank(), mpi.Size()
	start, endp1 := (id*n)/sz, ((id+1)*n)/sz
	for i := start; i < endp1; i++ {
		x[i] = float64(i)
	}

	// Barrier
	mpi.Barrier()

	io.Pfgrey("x @ proc # %d = %v\n", id, x)

	// SumToRoot
	r := make([]float64, n)
	mpi.SumToRoot(r, x)
	var tst testing.T
	if id == 0 {
		chk.Vector(&tst, fmt.Sprintf("SumToRoot:       r @ proc # %d", id), 1e-17, r, []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
	} else {
		chk.Vector(&tst, fmt.Sprintf("SumToRoot:       r @ proc # %d", id), 1e-17, r, make([]float64, n))
	}

	// BcastFromRoot
	r[0] = 666
	mpi.BcastFromRoot(r)
	chk.Vector(&tst, fmt.Sprintf("BcastFromRoot:   r @ proc # %d", id), 1e-17, r, []float64{666, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})

	// AllReduceSum
	setslice(x)
	w := make([]float64, n)
	mpi.AllReduceSum(x, w)
	chk.Vector(&tst, fmt.Sprintf("AllReduceSum:    w @ proc # %d", id), 1e-17, w, []float64{110, 110, 110, 1021, 1021, 1021, 2032, 2032, 2032, 3043, 3043})

	// AllReduceSumAdd
	setslice(x)
	y := []float64{-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000}
	mpi.AllReduceSumAdd(y, x, w)
	chk.Vector(&tst, fmt.Sprintf("AllReduceSumAdd: y @ proc # %d", id), 1e-17, y, []float64{-890, -890, -890, 21, 21, 21, 1032, 1032, 1032, 2043, 2043})

	// AllReduceMin
	setslice(x)
	mpi.AllReduceMin(x, w)
	chk.Vector(&tst, fmt.Sprintf("AllReduceMin:    x @ proc # %d", id), 1e-17, x, []float64{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3})

	// AllReduceMax
	setslice(x)
	mpi.AllReduceMax(x, w)
	chk.Vector(&tst, fmt.Sprintf("AllReduceMax:    x @ proc # %d", id), 1e-17, x, []float64{100, 100, 100, 1000, 1000, 1000, 2000, 2000, 2000, 3000, 3000})
}
示例#30
0
// Radau5 step function
func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) {

	// factors
	α := r5.α_ / o.h
	β := r5.β_ / o.h
	γ := r5.γ_ / o.h

	// Jacobian and decomposition
	if o.reuseJdec {
		o.reuseJdec = false
	} else {

		// calculate only first Jacobian for all iterations (simple/modified Newton's method)
		if o.reuseJ {
			o.reuseJ = false
		} else if !o.jacIsOK {

			// Jacobian triplet
			if o.jac == nil { // numerical
				//if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") }
				err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) {
					e = o.fcn(fy, o.h, x0, y, args...)
					return
				}, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable
			} else { // analytical
				//if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") }
				err = o.jac(&o.dfdyT, o.h, x0, y0, args...)
			}
			if err != nil {
				return
			}

			// create M matrix
			if o.doinit && !o.hasM {
				o.mTri = new(la.Triplet)
				if o.Distr {
					id, sz := mpi.Rank(), mpi.Size()
					start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz
					o.mTri.Init(o.ndim, o.ndim, endp1-start)
					for i := start; i < endp1; i++ {
						o.mTri.Put(i, i, 1.0)
					}
				} else {
					o.mTri.Init(o.ndim, o.ndim, o.ndim)
					for i := 0; i < o.ndim; i++ {
						o.mTri.Put(i, i, 1.0)
					}
				}
			}
			o.njeval += 1
			o.jacIsOK = true
		}

		// initialise triplets
		if o.doinit {
			o.rctriR = new(la.Triplet)
			o.rctriC = new(la.TripletC)
			o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len())
			xzmono := o.Distr
			o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono)
		}

		// update triplets
		la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT)       // rctriR :=      γ*M - dfdy
		la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy

		// initialise solver
		if o.doinit {
			err = o.lsolR.InitR(o.rctriR, false, false, false)
			if err != nil {
				return
			}
			err = o.lsolC.InitC(o.rctriC, false, false, false)
			if err != nil {
				return
			}
		}

		// perform factorisation
		o.lsolR.Fact()
		o.lsolC.Fact()
		o.ndecomp += 1
	}

	// updated u[i]
	o.u[0] = x0 + r5.c[0]*o.h
	o.u[1] = x0 + r5.c[1]*o.h
	o.u[2] = x0 + r5.c[2]*o.h

	// (trial/initial) updated z[i] and w[i]
	if o.first || o.ZeroTrial {
		for m := 0; m < o.ndim; m++ {
			o.z[0][m], o.w[0][m] = 0.0, 0.0
			o.z[1][m], o.w[1][m] = 0.0, 0.0
			o.z[2][m], o.w[2][m] = 0.0, 0.0
		}
	} else {
		c3q := o.h / o.hprev
		c1q := r5.μ1 * c3q
		c2q := r5.μ2 * c3q
		for m := 0; m < o.ndim; m++ {
			o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m]))
			o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m]))
			o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m]))
			o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m]
			o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m]
			o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m]
		}
	}

	// iterations
	o.nit = 0
	o.η = math.Pow(max(o.η, o.ϵ), 0.8)
	o.θ = o.θmax
	o.diverg = false
	var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64
	var it int
	for it = 0; it < o.NmaxIt; it++ {

		// max iterations ?
		o.nit = it + 1
		if o.nit > o.nitmax {
			o.nitmax = o.nit
		}

		// evaluate f(x,y) at (u[i],v[i]=y0+z[i])
		for i := 0; i < 3; i++ {
			for m := 0; m < o.ndim; m++ {
				o.v[i][m] = y0[m] + o.z[i][m]
			}
			o.nfeval += 1
			err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...)
			if err != nil {
				return
			}
		}

		// calc rhs
		if o.hasM {
			// using δw as workspace here
			la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0
			la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1
			la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2
			if o.Distr {
				mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here
				mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here
				mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here
			}
			for m := 0; m < o.ndim; m++ {
				o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m]
				o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m]
				o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m]
			}
		} else {
			for m := 0; m < o.ndim; m++ {
				o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m]
				o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m]
				o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m]
			}
		}

		// solve linear system
		o.nlinsol += 1
		var errR, errC error
		if !o.Distr && o.Pll {
			wg := new(sync.WaitGroup)
			wg.Add(2)
			go func() {
				errR = o.lsolR.SolveR(o.δw[0], o.v[0], false)
				wg.Done()
			}()
			go func() {
				errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false)
				wg.Done()
			}()
			wg.Wait()
		} else {
			errR = o.lsolR.SolveR(o.δw[0], o.v[0], false)
			errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false)
		}

		// check for errors from linear solution
		if errR != nil || errC != nil {
			var errmsg string
			if errR != nil {
				errmsg += errR.Error()
			}
			if errC != nil {
				if errR != nil {
					errmsg += "\n"
				}
				errmsg += errC.Error()
			}
			err = errors.New(errmsg)
			return
		}

		// update w and z
		for m := 0; m < o.ndim; m++ {
			o.w[0][m] += o.δw[0][m]
			o.w[1][m] += o.δw[1][m]
			o.w[2][m] += o.δw[2][m]
			o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m]
			o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m]
			o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m]
		}

		// rms norm of δw
		Lδw = 0.0
		for m := 0; m < o.ndim; m++ {
			Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0)
		}
		Lδw = math.Sqrt(Lδw / float64(3*o.ndim))

		// check convergence
		if it > 0 {
			thq = Lδw / oLδw
			if it == 1 {
				o.θ = thq
			} else {
				o.θ = math.Sqrt(thq * othq)
			}
			othq = thq
			if o.θ < 0.99 {
				o.η = o.θ / (1.0 - o.θ)
				iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ)
				itRerr = iterr / o.fnewt
				if itRerr >= 1.0 { // diverging
					qnewt = max(1.0e-4, min(20.0, itRerr))
					o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit)))
					o.diverg = true
					break
				}
			} else { // diverging badly (unexpected step-rejection)
				o.dvfac = 0.5
				o.diverg = true
				break
			}
		}

		// save old norm
		oLδw = Lδw

		// converged
		if o.η*Lδw < o.fnewt {
			break
		}
	}

	// did not converge
	if it == o.NmaxIt-1 {
		chk.Panic("radau5_step failed with it=%d", it)
	}

	// diverging => stop
	if o.diverg {
		rerr = 2.0 // must leave state intact, any rerr is OK
		return
	}

	// error estimate
	if o.LerrStrat == 1 {

		// simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems)
		for m := 0; m < o.ndim; m++ {
			o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
			o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m]
			rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0)
		}
		rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10)

	} else {

		// common
		if o.hasM {
			for m := 0; m < o.ndim; m++ {
				o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
				o.rhs[m] = o.f0[m]
			}
			if o.Distr {
				la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez)     // δw[0] = γ * M * ez (δw[0] is workspace)
				mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace)
			} else {
				la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez
			}
		} else {
			for m := 0; m < o.ndim; m++ {
				o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
				o.rhs[m] = o.f0[m] + γ*o.ez[m]
			}
		}

		// HW-VII p123 Eq.(8.19)
		if o.LerrStrat == 2 {
			o.lsolR.SolveR(o.lerr, o.rhs, false)
			rerr = o.rms_norm(o.lerr)

			// HW-VII p123 Eq.(8.20)
		} else {
			o.lsolR.SolveR(o.lerr, o.rhs, false)
			rerr = o.rms_norm(o.lerr)
			if !(rerr < 1.0) {
				if o.first || o.reject {
					for m := 0; m < o.ndim; m++ {
						o.v[0][m] = y0[m] + o.lerr[m] // y0perr
					}
					o.nfeval += 1
					err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr
					if err != nil {
						return
					}
					if o.hasM {
						la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr
						if o.Distr {
							la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez)     // δw[0] = γ * M * ez (δw[0] is workspace)
							mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace)
						} else {
							la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez
						}
					} else {
						la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez
					}
					o.lsolR.SolveR(o.lerr, o.rhs, false)
					rerr = o.rms_norm(o.lerr)
				}
			}
		}
	}
	return
}