func (h *QuasiNewtonHelper) BackTrackingLineSearch(cost float64, pos *core.Vector, grad *core.Vector, dir *core.Vector, isInit bool) (nextCost float64, nextPos *core.Vector) {
	dotGradDir := grad.Dot(dir)
	if dotGradDir == 0 {
		return cost, pos
	}
	if dotGradDir > 0 {
		panic("BackTracking: to the opposite direction of grad")
	}

	alpha := 1.0
	backoff := 0.5
	if isInit {
		normDir := math.Sqrt(dir.Dot(dir))
		alpha = (1 / normDir)
		backoff = 0.1
	}

	var c1 float64 = 1e-4
	for cntItr := 0; cntItr <= MAX_BACKTRACKING_ITER; cntItr++ {
		nextPos = h.minimizer.NextPoint(pos, dir, alpha)
		nextCost = h.minimizer.Evaluate(nextPos)
		if nextCost <= cost+c1*dotGradDir*alpha {
			break
		}
		alpha *= backoff
	}
	return nextCost, nextPos
}
// Description: Update the dir from -grad to optimal direction
//              Dir will be modified directly
func (h *QuasiNewtonHelper) ApplyQuasiInverseHession(dir *core.Vector) {
	count := len(h.sList)
	if count == 0 {
		return
	}
	alphas := make([]float64, count, count)
	for n := count - 1; n >= 0; n-- {
		alphas[n] = -dir.Dot(h.sList[n]) / h.roList[n]
		dir.ApplyElemWiseMultiplyAccumulation(h.yList[n], alphas[n])
	}
	lastY := h.yList[count-1]
	yDotY := lastY.Dot(lastY)
	scalar := h.roList[count-1] / yDotY
	dir.ApplyScale(scalar)

	for n := 0; n < count; n++ {
		beta := dir.Dot(h.yList[n]) / h.roList[n]
		dir.ApplyElemWiseMultiplyAccumulation(h.sList[n], -alphas[n]-beta)
	}
	return
}
Example #3
0
/*
   Given matrix m and vector v, compute inv(m)*v.
   Based on Gibbs and MacKay 1997, and Mark N. Gibbs's PhD dissertation

   Details:
   A - positive seminidefinite matrix
   u - a vector
   theta - positive number
   C = A + I*theta
   Returns inv(C)*u - So you need the diagonal noise term for covariance matrix in a sense.
   However, this algorithm is numerically stable, the noise term can be very small and the inversion can still be calculated...
*/
func (algo *GaussianProcess) ApproximateInversion(A *core.Matrix, u *core.Vector, theta float64, dim int64) *core.Vector {
	max_itr := 500
	tol := 0.01

	C := core.NewMatrix()
	for key, val := range A.Data {
		C.Data[key] = val.Copy()
	}

	// Add theta to diagonal elements
	for i := int64(0); i < dim; i++ {
		_, ok := C.Data[i]
		if !ok {
			C.Data[i] = core.NewVector()
		}
		C.Data[i].Data[i] = C.Data[i].Data[i] + theta
	}

	var Q_l float64
	var Q_u float64
	var dQ float64
	u_norm := u.Dot(u) / 2

	// Lower bound
	y_l := core.NewVector()
	g_l := u.Copy()
	h_l := u.Copy()
	lambda_l := float64(0)
	gamma_l := float64(0)
	var tmp_f1 float64
	var tmp_f2 float64
	var tmp_v1 *core.Vector
	tmp_f1 = g_l.Dot(g_l)
	tmp_v1 = C.MultiplyVector(h_l)

	// Upper bound
	y_u := core.NewVector()
	g_u := u.Copy()
	h_u := u.Copy()
	lambda_u := float64(0)
	gamma_u := float64(0)
	var tmp_f3 float64
	var tmp_f4 float64
	var tmp_v3 *core.Vector
	var tmp_v4 *core.Vector
	tmp_v3 = g_u.MultiplyMatrix(A)
	tmp_v4 = C.MultiplyVector(h_u)
	tmp_f3 = tmp_v1.Dot(g_u)

	for i := 0; i < max_itr; i++ {
		// Lower bound
		lambda_l = tmp_f1 / h_l.Dot(tmp_v1)
		y_l.AddVector(h_l, lambda_l) //y_l next
		Q_l = y_l.Dot(u) - 0.5*(y_l.MultiplyMatrix(C)).Dot(y_l)

		// Upper bound
		lambda_u = tmp_f3 / tmp_v3.Dot(tmp_v4)
		y_u.AddVector(h_u, lambda_u) //y_u next
		Q_u = (y_u.MultiplyMatrix(A)).Dot(u) - 0.5*((y_u.MultiplyMatrix(C)).MultiplyMatrix(A)).Dot(y_u)

		dQ = (u_norm-Q_u)/theta - Q_l
		if dQ < tol {
			break
		}

		// Lower bound var updates
		g_l.AddVector(tmp_v1, -lambda_l) //g_l next
		tmp_f2 = g_l.Dot(g_l)
		gamma_l = tmp_f2 / tmp_f1
		for key, val := range h_l.Data {
			h_l.SetValue(key, val*gamma_l)
		}
		h_l.AddVector(g_l, 1)          //h_l next
		tmp_f1 = tmp_f2                //tmp_f1 next
		tmp_v1 = C.MultiplyVector(h_l) //tmp_v1 next

		// Upper bound var updates
		g_u.AddVector(tmp_v4, -lambda_u) //g_u next
		tmp_v3 = g_u.MultiplyMatrix(A)   //tmp_v3 next
		tmp_f4 = tmp_v3.Dot(g_u)
		gamma_u = tmp_f4 / tmp_f3
		for key, val := range h_u.Data {
			h_u.SetValue(key, val*gamma_u)
		}
		h_u.AddVector(g_u, 1)          //h_u next
		tmp_v4 = C.MultiplyVector(h_u) //tmp_v4 next
		tmp_f3 = tmp_f4                // tmp_f3 next
	}

	return y_l
}