Exemplo n.º 1
0
// Radau5 step function
func radau5_step_mpi(o *Solver, y0 []float64, x0 float64, args ...interface{}) (rerr float64, err error) {

	// factors
	α := r5.α_ / o.h
	β := r5.β_ / o.h
	γ := r5.γ_ / o.h

	// Jacobian and decomposition
	if o.reuseJdec {
		o.reuseJdec = false
	} else {

		// calculate only first Jacobian for all iterations (simple/modified Newton's method)
		if o.reuseJ {
			o.reuseJ = false
		} else if !o.jacIsOK {

			// Jacobian triplet
			if o.jac == nil { // numerical
				//if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . numerical Jacobian . . . < < < < < < < < <\n") }
				err = num.JacobianMpi(&o.dfdyT, func(fy, y []float64) (e error) {
					e = o.fcn(fy, o.h, x0, y, args...)
					return
				}, y0, o.f0, o.w[0], o.Distr) // w works here as workspace variable
			} else { // analytical
				//if x0 == 0.0 { io.Pfgrey(" > > > > > > > > . . . analytical Jacobian . . . < < < < < < < < <\n") }
				err = o.jac(&o.dfdyT, o.h, x0, y0, args...)
			}
			if err != nil {
				return
			}

			// create M matrix
			if o.doinit && !o.hasM {
				o.mTri = new(la.Triplet)
				if o.Distr {
					id, sz := mpi.Rank(), mpi.Size()
					start, endp1 := (id*o.ndim)/sz, ((id+1)*o.ndim)/sz
					o.mTri.Init(o.ndim, o.ndim, endp1-start)
					for i := start; i < endp1; i++ {
						o.mTri.Put(i, i, 1.0)
					}
				} else {
					o.mTri.Init(o.ndim, o.ndim, o.ndim)
					for i := 0; i < o.ndim; i++ {
						o.mTri.Put(i, i, 1.0)
					}
				}
			}
			o.njeval += 1
			o.jacIsOK = true
		}

		// initialise triplets
		if o.doinit {
			o.rctriR = new(la.Triplet)
			o.rctriC = new(la.TripletC)
			o.rctriR.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len())
			xzmono := o.Distr
			o.rctriC.Init(o.ndim, o.ndim, o.mTri.Len()+o.dfdyT.Len(), xzmono)
		}

		// update triplets
		la.SpTriAdd(o.rctriR, γ, o.mTri, -1, &o.dfdyT)       // rctriR :=      γ*M - dfdy
		la.SpTriAddR2C(o.rctriC, α, β, o.mTri, -1, &o.dfdyT) // rctriC := (α+βi)*M - dfdy

		// initialise solver
		if o.doinit {
			err = o.lsolR.InitR(o.rctriR, false, false, false)
			if err != nil {
				return
			}
			err = o.lsolC.InitC(o.rctriC, false, false, false)
			if err != nil {
				return
			}
		}

		// perform factorisation
		o.lsolR.Fact()
		o.lsolC.Fact()
		o.ndecomp += 1
	}

	// updated u[i]
	o.u[0] = x0 + r5.c[0]*o.h
	o.u[1] = x0 + r5.c[1]*o.h
	o.u[2] = x0 + r5.c[2]*o.h

	// (trial/initial) updated z[i] and w[i]
	if o.first || o.ZeroTrial {
		for m := 0; m < o.ndim; m++ {
			o.z[0][m], o.w[0][m] = 0.0, 0.0
			o.z[1][m], o.w[1][m] = 0.0, 0.0
			o.z[2][m], o.w[2][m] = 0.0, 0.0
		}
	} else {
		c3q := o.h / o.hprev
		c1q := r5.μ1 * c3q
		c2q := r5.μ2 * c3q
		for m := 0; m < o.ndim; m++ {
			o.z[0][m] = c1q * (o.ycol[0][m] + (c1q-r5.μ4)*(o.ycol[1][m]+(c1q-r5.μ3)*o.ycol[2][m]))
			o.z[1][m] = c2q * (o.ycol[0][m] + (c2q-r5.μ4)*(o.ycol[1][m]+(c2q-r5.μ3)*o.ycol[2][m]))
			o.z[2][m] = c3q * (o.ycol[0][m] + (c3q-r5.μ4)*(o.ycol[1][m]+(c3q-r5.μ3)*o.ycol[2][m]))
			o.w[0][m] = r5.Ti[0][0]*o.z[0][m] + r5.Ti[0][1]*o.z[1][m] + r5.Ti[0][2]*o.z[2][m]
			o.w[1][m] = r5.Ti[1][0]*o.z[0][m] + r5.Ti[1][1]*o.z[1][m] + r5.Ti[1][2]*o.z[2][m]
			o.w[2][m] = r5.Ti[2][0]*o.z[0][m] + r5.Ti[2][1]*o.z[1][m] + r5.Ti[2][2]*o.z[2][m]
		}
	}

	// iterations
	o.nit = 0
	o.η = math.Pow(max(o.η, o.ϵ), 0.8)
	o.θ = o.θmax
	o.diverg = false
	var Lδw, oLδw, thq, othq, iterr, itRerr, qnewt float64
	var it int
	for it = 0; it < o.NmaxIt; it++ {

		// max iterations ?
		o.nit = it + 1
		if o.nit > o.nitmax {
			o.nitmax = o.nit
		}

		// evaluate f(x,y) at (u[i],v[i]=y0+z[i])
		for i := 0; i < 3; i++ {
			for m := 0; m < o.ndim; m++ {
				o.v[i][m] = y0[m] + o.z[i][m]
			}
			o.nfeval += 1
			err = o.fcn(o.f[i], o.h, o.u[i], o.v[i], args...)
			if err != nil {
				return
			}
		}

		// calc rhs
		if o.hasM {
			// using δw as workspace here
			la.SpMatVecMul(o.δw[0], 1, o.mMat, o.w[0]) // δw0 := M * w0
			la.SpMatVecMul(o.δw[1], 1, o.mMat, o.w[1]) // δw1 := M * w1
			la.SpMatVecMul(o.δw[2], 1, o.mMat, o.w[2]) // δw2 := M * w2
			if o.Distr {
				mpi.AllReduceSum(o.δw[0], o.v[0]) // v is used as workspace here
				mpi.AllReduceSum(o.δw[1], o.v[1]) // v is used as workspace here
				mpi.AllReduceSum(o.δw[2], o.v[2]) // v is used as workspace here
			}
			for m := 0; m < o.ndim; m++ {
				o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.δw[0][m]
				o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.δw[1][m] + β*o.δw[2][m]
				o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.δw[1][m] - α*o.δw[2][m]
			}
		} else {
			for m := 0; m < o.ndim; m++ {
				o.v[0][m] = r5.Ti[0][0]*o.f[0][m] + r5.Ti[0][1]*o.f[1][m] + r5.Ti[0][2]*o.f[2][m] - γ*o.w[0][m]
				o.v[1][m] = r5.Ti[1][0]*o.f[0][m] + r5.Ti[1][1]*o.f[1][m] + r5.Ti[1][2]*o.f[2][m] - α*o.w[1][m] + β*o.w[2][m]
				o.v[2][m] = r5.Ti[2][0]*o.f[0][m] + r5.Ti[2][1]*o.f[1][m] + r5.Ti[2][2]*o.f[2][m] - β*o.w[1][m] - α*o.w[2][m]
			}
		}

		// solve linear system
		o.nlinsol += 1
		var errR, errC error
		if !o.Distr && o.Pll {
			wg := new(sync.WaitGroup)
			wg.Add(2)
			go func() {
				errR = o.lsolR.SolveR(o.δw[0], o.v[0], false)
				wg.Done()
			}()
			go func() {
				errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false)
				wg.Done()
			}()
			wg.Wait()
		} else {
			errR = o.lsolR.SolveR(o.δw[0], o.v[0], false)
			errC = o.lsolC.SolveC(o.δw[1], o.δw[2], o.v[1], o.v[2], false)
		}

		// check for errors from linear solution
		if errR != nil || errC != nil {
			var errmsg string
			if errR != nil {
				errmsg += errR.Error()
			}
			if errC != nil {
				if errR != nil {
					errmsg += "\n"
				}
				errmsg += errC.Error()
			}
			err = errors.New(errmsg)
			return
		}

		// update w and z
		for m := 0; m < o.ndim; m++ {
			o.w[0][m] += o.δw[0][m]
			o.w[1][m] += o.δw[1][m]
			o.w[2][m] += o.δw[2][m]
			o.z[0][m] = r5.T[0][0]*o.w[0][m] + r5.T[0][1]*o.w[1][m] + r5.T[0][2]*o.w[2][m]
			o.z[1][m] = r5.T[1][0]*o.w[0][m] + r5.T[1][1]*o.w[1][m] + r5.T[1][2]*o.w[2][m]
			o.z[2][m] = r5.T[2][0]*o.w[0][m] + r5.T[2][1]*o.w[1][m] + r5.T[2][2]*o.w[2][m]
		}

		// rms norm of δw
		Lδw = 0.0
		for m := 0; m < o.ndim; m++ {
			Lδw += math.Pow(o.δw[0][m]/o.scal[m], 2.0) + math.Pow(o.δw[1][m]/o.scal[m], 2.0) + math.Pow(o.δw[2][m]/o.scal[m], 2.0)
		}
		Lδw = math.Sqrt(Lδw / float64(3*o.ndim))

		// check convergence
		if it > 0 {
			thq = Lδw / oLδw
			if it == 1 {
				o.θ = thq
			} else {
				o.θ = math.Sqrt(thq * othq)
			}
			othq = thq
			if o.θ < 0.99 {
				o.η = o.θ / (1.0 - o.θ)
				iterr = Lδw * math.Pow(o.θ, float64(o.NmaxIt-o.nit)) / (1.0 - o.θ)
				itRerr = iterr / o.fnewt
				if itRerr >= 1.0 { // diverging
					qnewt = max(1.0e-4, min(20.0, itRerr))
					o.dvfac = 0.8 * math.Pow(qnewt, -1.0/(4.0+float64(o.NmaxIt)-1.0-float64(o.nit)))
					o.diverg = true
					break
				}
			} else { // diverging badly (unexpected step-rejection)
				o.dvfac = 0.5
				o.diverg = true
				break
			}
		}

		// save old norm
		oLδw = Lδw

		// converged
		if o.η*Lδw < o.fnewt {
			break
		}
	}

	// did not converge
	if it == o.NmaxIt-1 {
		chk.Panic("radau5_step failed with it=%d", it)
	}

	// diverging => stop
	if o.diverg {
		rerr = 2.0 // must leave state intact, any rerr is OK
		return
	}

	// error estimate
	if o.LerrStrat == 1 {

		// simple strategy => HW-VII p123 Eq.(8.17) (not good for stiff problems)
		for m := 0; m < o.ndim; m++ {
			o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
			o.lerr[m] = r5.γ0*o.h*o.f0[m] + o.ez[m]
			rerr += math.Pow(o.lerr[m]/o.scal[m], 2.0)
		}
		rerr = max(math.Sqrt(rerr/float64(o.ndim)), 1.0e-10)

	} else {

		// common
		if o.hasM {
			for m := 0; m < o.ndim; m++ {
				o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
				o.rhs[m] = o.f0[m]
			}
			if o.Distr {
				la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez)     // δw[0] = γ * M * ez (δw[0] is workspace)
				mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace)
			} else {
				la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez
			}
		} else {
			for m := 0; m < o.ndim; m++ {
				o.ez[m] = r5.e0*o.z[0][m] + r5.e1*o.z[1][m] + r5.e2*o.z[2][m]
				o.rhs[m] = o.f0[m] + γ*o.ez[m]
			}
		}

		// HW-VII p123 Eq.(8.19)
		if o.LerrStrat == 2 {
			o.lsolR.SolveR(o.lerr, o.rhs, false)
			rerr = o.rms_norm(o.lerr)

			// HW-VII p123 Eq.(8.20)
		} else {
			o.lsolR.SolveR(o.lerr, o.rhs, false)
			rerr = o.rms_norm(o.lerr)
			if !(rerr < 1.0) {
				if o.first || o.reject {
					for m := 0; m < o.ndim; m++ {
						o.v[0][m] = y0[m] + o.lerr[m] // y0perr
					}
					o.nfeval += 1
					err = o.fcn(o.f[0], o.h, x0, o.v[0], args...) // f0perr
					if err != nil {
						return
					}
					if o.hasM {
						la.VecCopy(o.rhs, 1, o.f[0]) // rhs := f0perr
						if o.Distr {
							la.SpMatVecMul(o.δw[0], γ, o.mMat, o.ez)     // δw[0] = γ * M * ez (δw[0] is workspace)
							mpi.AllReduceSumAdd(o.rhs, o.δw[0], o.δw[1]) // rhs += join_with_sum(δw[0]) (δw[1] is workspace)
						} else {
							la.SpMatVecMulAdd(o.rhs, γ, o.mMat, o.ez) // rhs += γ * M * ez
						}
					} else {
						la.VecAdd2(o.rhs, 1, o.f[0], γ, o.ez) // rhs = f0perr + γ * ez
					}
					o.lsolR.SolveR(o.lerr, o.rhs, false)
					rerr = o.rms_norm(o.lerr)
				}
			}
		}
	}
	return
}
Exemplo n.º 2
0
// Solve solves linear programming problem
func (o *LinIpm) Solve(verbose bool) (err error) {

	// starting point
	AAt := la.MatAlloc(o.Nl, o.Nl)         // A*Aᵀ
	d := make([]float64, o.Nl)             // inv(AAt) * b
	e := make([]float64, o.Nl)             // A * c
	la.SpMatMatTrMul(AAt, 1, o.A)          // AAt := A*Aᵀ
	la.SpMatVecMul(e, 1, o.A, o.C)         // e := A * c
	la.SPDsolve2(d, o.L, AAt, o.B, e)      // d := inv(AAt) * b  and  L := inv(AAt) * e
	la.SpMatTrVecMul(o.X, 1, o.A, d)       // x := Aᵀ * d
	la.VecCopy(o.S, 1, o.C)                // s := c
	la.SpMatTrVecMulAdd(o.S, -1, o.A, o.L) // s -= Aᵀλ
	xmin := o.X[0]
	smin := o.S[0]
	for i := 1; i < o.Nx; i++ {
		xmin = min(xmin, o.X[i])
		smin = min(smin, o.S[i])
	}
	δx := max(-1.5*xmin, 0)
	δs := max(-1.5*smin, 0)
	var xdots, xsum, ssum float64
	for i := 0; i < o.Nx; i++ {
		o.X[i] += δx
		o.S[i] += δs
		xdots += o.X[i] * o.S[i]
		xsum += o.X[i]
		ssum += o.S[i]
	}
	δx = 0.5 * xdots / ssum
	δs = 0.5 * xdots / xsum
	for i := 0; i < o.Nx; i++ {
		o.X[i] += δx
		o.S[i] += δs
	}

	// constants for linear solver
	symmetric := false
	timing := false

	// auxiliary
	I := o.Nx + o.Nl

	// control variables
	var μ, σ float64     // μ and σ
	var xrmin float64    // min{ x_i / (-Δx_i) } (x_ratio_minimum)
	var srmin float64    // min{ s_i / (-Δs_i) } (s_ratio_minimum)
	var αpa float64      // α_prime_affine
	var αda float64      // α_dual_affine
	var μaff float64     // μ_affine
	var ctx, btl float64 // cᵀx and bᵀl

	// message
	if verbose {
		io.Pf("%3s%16s%16s\n", "it", "f(x)", "error")
	}

	// perform iterations
	it := 0
	for it = 0; it < o.NmaxIt; it++ {

		// compute residual
		la.SpMatTrVecMul(o.Rx, 1, o.A, o.L) // rx := Aᵀλ
		la.SpMatVecMul(o.Rl, 1, o.A, o.X)   // rλ := A x
		ctx, btl, μ = 0, 0, 0
		for i := 0; i < o.Nx; i++ {
			o.Rx[i] += o.S[i] - o.C[i]
			o.Rs[i] = o.X[i] * o.S[i]
			ctx += o.C[i] * o.X[i]
			μ += o.X[i] * o.S[i]
		}
		for i := 0; i < o.Nl; i++ {
			o.Rl[i] -= o.B[i]
			btl += o.B[i] * o.L[i]
		}
		μ /= float64(o.Nx)

		// check convergence
		lerr := math.Abs(ctx-btl) / (1.0 + math.Abs(ctx))
		if verbose {
			fx := la.VecDot(o.C, o.X)
			io.Pf("%3d%16.8e%16.8e\n", it, fx, lerr)
		}
		if lerr < o.Tol {
			break
		}

		// assemble Jacobian
		o.J.Start()
		o.J.PutCCMatAndMatT(o.A)
		for i := 0; i < o.Nx; i++ {
			o.J.Put(i, I+i, 1.0)
			o.J.Put(I+i, i, o.S[i])
			o.J.Put(I+i, I+i, o.X[i])
		}

		// solve linear system
		if it == 0 {
			err = o.Lis.InitR(o.J, symmetric, false, timing)
			if err != nil {
				return
			}
		}
		err = o.Lis.Fact()
		if err != nil {
			return
		}
		err = o.Lis.SolveR(o.Mdy, o.R, false) // mdy := inv(J) * R
		if err != nil {
			return
		}

		// control variables
		xrmin, srmin = o.calc_min_ratios()
		αpa = min(1, xrmin)
		αda = min(1, srmin)
		μaff = 0
		for i := 0; i < o.Nx; i++ {
			μaff += (o.X[i] - αpa*o.Mdx[i]) * (o.S[i] - αda*o.Mds[i])
		}
		μaff /= float64(o.Nx)
		σ = math.Pow(μaff/μ, 3)

		// update residual
		for i := 0; i < o.Nx; i++ {
			o.Rs[i] += o.Mdx[i]*o.Mds[i] - σ*μ
		}

		// solve linear system again
		err = o.Lis.SolveR(o.Mdy, o.R, false) // mdy := inv(J) * R
		if err != nil {
			return
		}

		// step lengths
		xrmin, srmin = o.calc_min_ratios()
		αpa = min(1, 0.99*xrmin)
		αda = min(1, 0.99*srmin)

		// update
		for i := 0; i < o.Nx; i++ {
			o.X[i] -= αpa * o.Mdx[i]
			o.S[i] -= αda * o.Mds[i]
		}
		for i := 0; i < o.Nl; i++ {
			o.L[i] -= αda * o.Mdl[i]
		}
	}

	// check convergence
	if it == o.NmaxIt {
		err = chk.Err("iterations did not converge")
	}
	return
}