Beispiel #1
0
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
	// Uses two-loop correction as described in
	// Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178.

	if len(loc.X) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}
	if len(loc.Gradient) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}
	if len(dir) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}

	y := l.y[l.oldest]
	floats.SubTo(y, loc.Gradient, l.grad)
	s := l.s[l.oldest]
	floats.SubTo(s, loc.X, l.x)
	sDotY := floats.Dot(s, y)
	l.rho[l.oldest] = 1 / sDotY

	l.oldest = (l.oldest + 1) % l.Store

	copy(l.x, loc.X)
	copy(l.grad, loc.Gradient)
	copy(dir, loc.Gradient)

	// Start with the most recent element and go backward,
	for i := 0; i < l.Store; i++ {
		idx := l.oldest - i - 1
		if idx < 0 {
			idx += l.Store
		}
		l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir)
		floats.AddScaled(dir, -l.a[idx], l.y[idx])
	}

	// Scale the initial Hessian.
	gamma := sDotY / floats.Dot(y, y)
	floats.Scale(gamma, dir)

	// Start with the oldest element and go forward.
	for i := 0; i < l.Store; i++ {
		idx := i + l.oldest
		if idx >= l.Store {
			idx -= l.Store
		}
		beta := l.rho[idx] * floats.Dot(l.y[idx], dir)
		floats.AddScaled(dir, l.a[idx]-beta, l.s[idx])
	}

	// dir contains H^{-1} * g, so flip the direction for minimization.
	floats.Scale(-1, dir)

	return 1
}
Beispiel #2
0
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
	if len(loc.X) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}
	if len(loc.Gradient) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}
	if len(dir) != l.dim {
		panic("lbfgs: unexpected size mismatch")
	}

	// Update direction. Uses two-loop correction as described in
	// Nocedal, Wright (2006), Numerical Optimization (2nd ed.). Chapter 7, page 178.
	copy(dir, loc.Gradient)
	floats.SubTo(l.y, loc.Gradient, l.grad)
	floats.SubTo(l.s, loc.X, l.x)
	copy(l.sHist[l.oldest], l.s)
	copy(l.yHist[l.oldest], l.y)
	sDotY := floats.Dot(l.y, l.s)
	l.rhoHist[l.oldest] = 1 / sDotY

	l.oldest++
	l.oldest = l.oldest % l.Store
	copy(l.x, loc.X)
	copy(l.grad, loc.Gradient)

	// two loop update. First loop starts with the most recent element
	// and goes backward, second starts with the oldest element and goes
	// forward. At the end have computed H^-1 * g, so flip the direction for
	// minimization.
	for i := 0; i < l.Store; i++ {
		idx := l.oldest - i - 1
		if idx < 0 {
			idx += l.Store
		}
		l.a[idx] = l.rhoHist[idx] * floats.Dot(l.sHist[idx], dir)
		floats.AddScaled(dir, -l.a[idx], l.yHist[idx])
	}

	// Scale the initial Hessian.
	gamma := sDotY / floats.Dot(l.y, l.y)
	floats.Scale(gamma, dir)

	for i := 0; i < l.Store; i++ {
		idx := i + l.oldest
		if idx >= l.Store {
			idx -= l.Store
		}
		beta := l.rhoHist[idx] * floats.Dot(l.yHist[idx], dir)
		floats.AddScaled(dir, l.a[idx]-beta, l.sHist[idx])
	}
	floats.Scale(-1, dir)

	return 1
}
Beispiel #3
0
func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
	// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
	// the Identity) from Nocedal, Wright (2006), 2nd edition.

	dim := len(loc.X)
	n.hess.CopySym(loc.Hessian)

	// Find the smallest diagonal entry of the Hesssian.
	minA := n.hess.At(0, 0)
	for i := 1; i < dim; i++ {
		a := n.hess.At(i, i)
		if a < minA {
			minA = a
		}
	}
	// If the smallest diagonal entry is positive, the Hessian may be positive
	// definite, and so first attempt to apply the Cholesky factorization to
	// the un-modified Hessian. If the smallest entry is negative, use the
	// final tau from the last iteration if regularization was needed,
	// otherwise guess an appropriate value for tau.
	if minA > 0 {
		n.tau = 0
	} else if n.tau == 0 {
		n.tau = -minA + 0.001
	}

	for k := 0; k < maxNewtonModifications; k++ {
		if n.tau != 0 {
			// Add a multiple of identity to the Hessian.
			for i := 0; i < dim; i++ {
				n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
			}
		}
		// Try to apply the Cholesky factorization.
		pd := n.chol.Factorize(n.hess)
		if pd {
			d := mat64.NewVector(dim, dir)
			// Store the solution in d's backing array, dir.
			d.SolveCholeskyVec(&n.chol, mat64.NewVector(dim, loc.Gradient))
			floats.Scale(-1, dir)
			return 1
		}
		// Modified Hessian is not PD, so increase tau.
		n.tau = math.Max(n.Increase*n.tau, 0.001)
	}

	// Hessian modification failed to get a PD matrix. Return the negative
	// gradient as the descent direction.
	copy(dir, loc.Gradient)
	floats.Scale(-1, dir)
	return 1
}
Beispiel #4
0
func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
	dim := len(loc.X)
	b.dim = dim

	b.x = resize(b.x, dim)
	copy(b.x, loc.X)
	b.grad = resize(b.grad, dim)
	copy(b.grad, loc.Gradient)

	b.y = resize(b.y, dim)
	b.s = resize(b.s, dim)
	b.tmp = resize(b.tmp, dim)
	b.yVec = mat64.NewVector(dim, b.y)
	b.sVec = mat64.NewVector(dim, b.s)
	b.tmpVec = mat64.NewVector(dim, b.tmp)

	if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
		b.invHess = mat64.NewSymDense(dim, nil)
	} else {
		b.invHess = mat64.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
	}

	// The values of the hessian are initialized in the first call to NextDirection

	// initial direcion is just negative of gradient because the hessian is 1
	copy(dir, loc.Gradient)
	floats.Scale(-1, dir)

	b.first = true

	return 1 / floats.Norm(dir, 2)
}
Beispiel #5
0
// locationAsy returns the node locations and weights of a Hermite quadrature rule
// with len(x) points.
func (h Hermite) locationsAsy(x, w []float64) {
	// A. Townsend, T. Trogdon, and S.Olver, Fast computation of Gauss quadrature
	// nodes and weights the whole real line, IMA J. Numer. Anal.,
	// 36: 337–358, 2016. http://arxiv.org/abs/1410.5286

	// Find the positive locations and weights.
	n := len(x)
	l := n / 2
	xa := x[l:]
	wa := w[l:]
	for i := range xa {
		xa[i], wa[i] = h.locationsAsy0(i, n)
	}
	// Flip around zero -- copy the negative x locations with the corresponding
	// weights.
	if n%2 == 0 {
		l--
	}
	for i, v := range xa {
		x[l-i] = -v
	}
	for i, v := range wa {
		w[l-i] = v
	}
	sumW := floats.Sum(w)
	c := math.SqrtPi / sumW
	floats.Scale(c, w)
}
Beispiel #6
0
func TestCategoricalCDF(t *testing.T) {
	for _, test := range [][]float64{
		{1, 2, 3, 0, 4},
	} {
		c := make([]float64, len(test))
		copy(c, test)
		floats.Scale(1/floats.Sum(c), c)
		sum := make([]float64, len(test))
		floats.CumSum(sum, c)

		dist := NewCategorical(test, nil)
		cdf := dist.CDF(-0.5)
		if cdf != 0 {
			t.Errorf("CDF of negative number not zero")
		}
		for i := range c {
			cdf := dist.CDF(float64(i))
			if math.Abs(cdf-sum[i]) > 1e-14 {
				t.Errorf("CDF mismatch %v. Want %v, got %v.", float64(i), sum[i], cdf)
			}
			cdfp := dist.CDF(float64(i) + 0.5)
			if cdfp != cdf {
				t.Errorf("CDF mismatch for non-integer input")
			}
		}
	}
}
Beispiel #7
0
func TestCategoricalProb(t *testing.T) {
	for _, test := range [][]float64{
		{1, 2, 3, 0},
	} {
		dist := NewCategorical(test, nil)
		norm := make([]float64, len(test))
		floats.Scale(1/floats.Sum(norm), norm)
		for i, v := range norm {
			p := dist.Prob(float64(i))
			if math.Abs(p-v) > 1e-14 {
				t.Errorf("Probability mismatch element %d", i)
			}
			p = dist.Prob(float64(i) + 0.5)
			if p != 0 {
				t.Errorf("Non-zero probability for non-integer x")
			}
		}
		p := dist.Prob(-1)
		if p != 0 {
			t.Errorf("Non-zero probability for -1")
		}
		p = dist.Prob(float64(len(test)))
		if p != 0 {
			t.Errorf("Non-zero probability for len(test)")
		}
	}
}
Beispiel #8
0
// Not callable in parallel because of the batches
func (g *GradOptimizable) FuncGrad(params []float64, deriv []float64) float64 {
	inds := g.Sampler.Iterate()
	total := len(inds)

	var totalLoss float64
	for i := range deriv {
		deriv[i] = 0
	}

	// Send the regularizer
	g.batches[0].parameters = params
	g.regularizeChan <- g.batches[0]

	// Send initial batches out
	var initBatches int
	var lastSent int
	for i := 0; i < g.NumWorkers; i++ {
		if lastSent == total {
			break
		}
		add := g.grainSize
		if lastSent+add >= total {
			add = total - lastSent
		}
		initBatches++
		g.batches[i+1].idxs = inds[lastSent : lastSent+add]
		g.batches[i+1].parameters = params
		g.sendWork <- g.batches[i+1]
		lastSent += add
	}

	// Collect the batches and resend out
	for lastSent < total {
		batch := <-g.receiveWork
		totalLoss += batch.loss
		floats.Add(deriv, batch.deriv)
		add := g.grainSize
		if lastSent+add >= total {
			add = total - lastSent
		}
		batch.idxs = inds[lastSent : lastSent+add]
		g.sendWork <- batch
		lastSent += add
	}

	// All inds sent, so just weight for all the collection
	for i := 0; i < initBatches; i++ {
		batch := <-g.receiveWork
		totalLoss += batch.loss
		floats.Add(deriv, batch.deriv)
	}
	batch := <-g.regDone
	totalLoss += batch.loss
	floats.Add(deriv, batch.deriv)

	totalLoss /= float64(len(inds))
	floats.Scale(1/float64(len(inds)), deriv)
	return totalLoss
}
Beispiel #9
0
// returnNext updates the location based on the iteration type and the current
// simplex, and returns the next operation.
func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) {
	n.lastIter = iter
	switch iter {
	case nmMajor:
		// Fill loc with the current best point and value,
		// and command a convergence check.
		copy(loc.X, n.vertices[0])
		loc.F = n.values[0]
		return MajorIteration, nil
	case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
		// x_new = x_centroid + scale * (x_centroid - x_worst)
		var scale float64
		switch iter {
		case nmReflected:
			scale = n.reflection
		case nmExpanded:
			scale = n.reflection * n.expansion
		case nmContractedOutside:
			scale = n.reflection * n.contraction
		case nmContractedInside:
			scale = -n.contraction
		}
		dim := len(loc.X)
		floats.SubTo(loc.X, n.centroid, n.vertices[dim])
		floats.Scale(scale, loc.X)
		floats.Add(loc.X, n.centroid)
		if iter == nmReflected {
			copy(n.reflectedPoint, loc.X)
		}
		return FuncEvaluation, nil
	case nmShrink:
		// x_shrink = x_best + delta * (x_i + x_best)
		floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0])
		floats.Scale(n.shrink, loc.X)
		floats.Add(loc.X, n.vertices[0])
		return FuncEvaluation, nil
	default:
		panic("unreachable")
	}
}
Beispiel #10
0
// ObjDeriv computes the objective value and stores the derivative in place
func (g *BatchGradBased) ObjGrad(parameters []float64, derivative []float64) (loss float64) {
	c := make(chan lossDerivStruct, 10)

	// Set the channel for parallel for
	f := func(start, end int) {
		g.lossDerivFunc(start, end, c, parameters)
	}

	go func() {
		wg := &sync.WaitGroup{}
		// Compute the losses and the derivatives all in parallel
		wg.Add(2)
		go func() {
			common.ParallelFor(g.nTrain, g.grainSize, f)
			wg.Done()
		}()
		// Compute the regularization
		go func() {
			deriv := make([]float64, g.nParameters)
			loss := g.regularizer.LossDeriv(parameters, deriv)
			//fmt.Println("regularizer loss = ", loss)
			//fmt.Println("regularizer deriv = ", deriv)
			c <- lossDerivStruct{
				loss:  loss,
				deriv: deriv,
			}
			wg.Done()
		}()
		// Wait for all of the results to be sent on the channel
		wg.Wait()
		// Close the channel
		close(c)
	}()
	// zero the derivative
	for i := range derivative {
		derivative[i] = 0
	}

	// Range over the channel, incrementing the loss and derivative
	// as they come in
	for l := range c {
		loss += l.loss
		floats.Add(derivative, l.deriv)
	}
	//fmt.Println("nTrain", g.nTrain)
	//fmt.Println("final deriv", derivative)
	// Normalize by the number of training samples
	loss /= float64(g.nTrain)
	floats.Scale(1/float64(g.nTrain), derivative)

	return loss
}
Beispiel #11
0
// UpdateOne updates sufficient statistics using one observation.
func (g *Model) UpdateOne(o model.Obs, w float64) {

	glog.V(6).Infof("gaussian update, name:%s, obs:%v, weight:%e", g.ModelName, o, w)

	/* Update sufficient statistics. */
	obs, _, _ := model.ObsToF64(o)
	floatx.Apply(floatx.ScaleFunc(w), obs, g.tmpArray)
	floats.Add(g.Sumx, g.tmpArray)
	floatx.Sq(g.tmpArray, obs)
	floats.Scale(w, g.tmpArray)
	floats.Add(g.Sumxsq, g.tmpArray)
	g.NSamples += w
}
Beispiel #12
0
func sampleCategorical(t *testing.T, dist Categorical, nSamples int) []float64 {
	counts := make([]float64, dist.Len())
	for i := 0; i < nSamples; i++ {
		v := dist.Rand()
		if float64(int(v)) != v {
			t.Fatalf("Random number is not an integer")
		}
		counts[int(v)]++
	}
	sum := floats.Sum(counts)
	floats.Scale(1/sum, counts)
	return counts
}
Beispiel #13
0
func TestJensenShannon(t *testing.T) {
	for i, test := range []struct {
		p []float64
		q []float64
	}{
		{
			p: []float64{0.5, 0.1, 0.3, 0.1},
			q: []float64{0.1, 0.4, 0.25, 0.25},
		},
		{
			p: []float64{0.4, 0.6, 0.0},
			q: []float64{0.2, 0.2, 0.6},
		},
		{
			p: []float64{0.1, 0.1, 0.0, 0.8},
			q: []float64{0.6, 0.3, 0.0, 0.1},
		},
		{
			p: []float64{0.5, 0.1, 0.3, 0.1},
			q: []float64{0.5, 0, 0.25, 0.25},
		},
		{
			p: []float64{0.5, 0.1, 0, 0.4},
			q: []float64{0.1, 0.4, 0.25, 0.25},
		},
	} {

		m := make([]float64, len(test.p))
		p := test.p
		q := test.q
		floats.Add(m, p)
		floats.Add(m, q)
		floats.Scale(0.5, m)

		js1 := 0.5*KullbackLeibler(p, m) + 0.5*KullbackLeibler(q, m)
		js2 := JensenShannon(p, q)

		if math.IsNaN(js2) {
			t.Errorf("In case %v, JS distance is NaN", i)
		}

		if math.Abs(js1-js2) > 1e-14 {
			t.Errorf("JS mismatch case %v. Expected %v, found %v.", i, js1, js2)
		}
	}
	if !Panics(func() { JensenShannon(make([]float64, 3), make([]float64, 2)) }) {
		t.Errorf("JensenShannon did not panic with p, q length mismatch")
	}
}
Beispiel #14
0
// Explicitly forms vectors and computes normalized dot product.
func cosCorrMultiNaive(f, g *rimg64.Multi) *rimg64.Image {
	h := rimg64.New(f.Width-g.Width+1, f.Height-g.Height+1)
	n := g.Width * g.Height * g.Channels
	a := make([]float64, n)
	b := make([]float64, n)
	for i := 0; i < h.Width; i++ {
		for j := 0; j < h.Height; j++ {
			a = a[:0]
			b = b[:0]
			for u := 0; u < g.Width; u++ {
				for v := 0; v < g.Height; v++ {
					for p := 0; p < g.Channels; p++ {
						a = append(a, f.At(i+u, j+v, p))
						b = append(b, g.At(u, v, p))
					}
				}
			}
			floats.Scale(1/floats.Norm(a, 2), a)
			floats.Scale(1/floats.Norm(b, 2), b)
			h.Set(i, j, floats.Dot(a, b))
		}
	}
	return h
}
Beispiel #15
0
// returnNext finds the next location to evaluate, stores the location in xNext,
// and returns the data
func (n *NelderMead) returnNext(iter nmIterType, xNext []float64) (EvaluationType, IterationType, error) {
	dim := len(xNext)
	n.lastIter = iter
	switch iter {
	case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
		// x_new = x_centroid + scale * (x_centroid - x_worst)
		var scale float64
		switch iter {
		case nmReflected:
			scale = n.reflection
		case nmExpanded:
			scale = n.reflection * n.expansion
		case nmContractedOutside:
			scale = n.reflection * n.contraction
		case nmContractedInside:
			scale = -n.contraction
		}
		floats.SubTo(xNext, n.centroid, n.vertices[dim])
		floats.Scale(scale, xNext)
		floats.Add(xNext, n.centroid)
		if iter == nmReflected {
			copy(n.reflectedPoint, xNext)
			// Nelder Mead iterations start with Reflection step
			return FuncEvaluation, MajorIteration, nil
		}
		return FuncEvaluation, MinorIteration, nil
	case nmShrink:
		// x_shrink = x_best + delta * (x_i + x_best)
		floats.SubTo(xNext, n.vertices[n.fillIdx], n.vertices[0])
		floats.Scale(n.shrink, xNext)
		floats.Add(xNext, n.vertices[0])
		return FuncEvaluation, SubIteration, nil
	default:
		panic("unreachable")
	}
}
Beispiel #16
0
func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
	dim := len(loc.X)
	l.dim = dim

	if l.Store == 0 {
		l.Store = 15
	}

	l.oldest = l.Store - 1 // the first vector will be put in at 0

	l.x = resize(l.x, dim)
	l.grad = resize(l.grad, dim)
	copy(l.x, loc.X)
	copy(l.grad, loc.Gradient)

	l.y = resize(l.y, dim)
	l.s = resize(l.s, dim)
	l.a = resize(l.a, l.Store)
	l.rhoHist = resize(l.rhoHist, l.Store)

	if cap(l.yHist) < l.Store {
		n := make([][]float64, l.Store-cap(l.yHist))
		l.yHist = append(l.yHist, n...)
	}
	if cap(l.sHist) < l.Store {
		n := make([][]float64, l.Store-cap(l.sHist))
		l.sHist = append(l.sHist, n...)
	}
	l.yHist = l.yHist[:l.Store]
	l.sHist = l.sHist[:l.Store]
	for i := range l.sHist {
		l.sHist[i] = resize(l.sHist[i], dim)
		for j := range l.sHist[i] {
			l.sHist[i][j] = 0
		}
	}
	for i := range l.yHist {
		l.yHist[i] = resize(l.yHist[i], dim)
		for j := range l.yHist[i] {
			l.yHist[i][j] = 0
		}
	}

	copy(dir, loc.Gradient)
	floats.Scale(-1, dir)

	return 1 / floats.Norm(dir, 2)
}
Beispiel #17
0
// PrincipalComponents returns the principal component direction vectors and
// the column variances of the principal component scores, vecs * a, computed
// using the singular value decomposition of the input. The input a is an n×d
// matrix where each row is an observation and each column represents a variable.
//
// PrincipalComponents centers the variables but does not scale the variance.
//
// The slice weights is used to weight the observations. If weights is nil,
// each weight is considered to have a value of one, otherwise the length of
// weights must match the number of observations or PrincipalComponents will
// panic.
//
// On successful completion, the principal component direction vectors are
// returned in vecs as a d×min(n, d) matrix, and the variances are returned in
// vars as a min(n, d)-long slice in descending sort order.
//
// If no singular value decomposition is possible, vecs and vars are returned
// nil and ok is returned false.
func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) {
	n, d := a.Dims()
	if weights != nil && len(weights) != n {
		panic("stat: len(weights) != observations")
	}

	centered := mat64.NewDense(n, d, nil)
	col := make([]float64, n)
	for j := 0; j < d; j++ {
		mat64.Col(col, j, a)
		floats.AddConst(-Mean(col, weights), col)
		centered.SetCol(j, col)
	}
	for i, w := range weights {
		floats.Scale(math.Sqrt(w), centered.RawRowView(i))
	}

	kind := matrix.SVDFull
	if n > d {
		kind = matrix.SVDThin
	}
	var svd mat64.SVD
	ok = svd.Factorize(centered, kind)
	if !ok {
		return nil, nil, false
	}

	vecs = &mat64.Dense{}
	vecs.VFromSVD(&svd)
	if n < d {
		// Don't retain columns that are not valid direction vectors.
		vecs.Clone(vecs.View(0, 0, d, n))
	}
	vars = svd.Values(nil)
	var f float64
	if weights == nil {
		f = 1 / float64(n-1)
	} else {
		f = 1 / (floats.Sum(weights) - 1)
	}
	for i, v := range vars {
		vars[i] = f * v * v
	}
	return vecs, vars, true
}
Beispiel #18
0
// StdDevBatch predicts the standard deviation at a set of locations of x.
func (g *GP) StdDevBatch(std []float64, x mat64.Matrix) []float64 {
	r, c := x.Dims()
	if c != g.inputDim {
		panic(badInputLength)
	}
	if std == nil {
		std = make([]float64, r)
	}
	if len(std) != r {
		panic(badStorage)
	}
	// For a single point, the stddev is
	// 		sigma = k(x,x) - k_*^T * K^-1 * k_*
	// where k is the vector of kernels between the input points and the output points
	// For many points, the formula is:
	// 		nu_* = k(x_*, k_*) - k_*^T * K^-1 * k_*
	// This creates the full covariance matrix which is an rxr matrix. However,
	// the standard deviations are just the diagonal of this matrix. Instead, be
	// smart about it and compute the diagonal terms one at a time.
	kStar := g.formKStar(x)
	var tmp mat64.Dense
	tmp.SolveCholesky(g.cholK, kStar)

	// set k(x_*, x_*) into std then subtract k_*^T K^-1 k_* , computed one row at a time
	var tmp2 mat64.Vector
	row := make([]float64, c)
	for i := range std {
		for k := 0; k < c; k++ {
			row[k] = x.At(i, k)
		}
		std[i] = g.kernel.Distance(row, row)
		tmp2.MulVec(kStar.ColView(i).T(), tmp.ColView(i))
		rt, ct := tmp2.Dims()
		if rt != 1 && ct != 1 {
			panic("bad size")
		}
		std[i] -= tmp2.At(0, 0)
		std[i] = math.Sqrt(std[i])
	}
	// Need to scale the standard deviation to be in the same units as y.
	floats.Scale(g.std, std)
	return std
}
Beispiel #19
0
func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
	dim := len(loc.X)
	l.dim = dim
	l.oldest = 0

	l.a = resize(l.a, l.Store)
	l.rho = resize(l.rho, l.Store)
	l.y = l.initHistory(l.y)
	l.s = l.initHistory(l.s)

	l.x = resize(l.x, dim)
	copy(l.x, loc.X)

	l.grad = resize(l.grad, dim)
	copy(l.grad, loc.Gradient)

	copy(dir, loc.Gradient)
	floats.Scale(-1, dir)
	return 1 / floats.Norm(dir, 2)
}
Beispiel #20
0
func MakeFitLinScale(targetImage *imgut.Image) func(*imgut.Image) float64 {
	// Pre-compute image to slice of floats
	dataTarg := imgut.ToSlice(targetImage)
	// Pre-compute average
	avgt := floats.Sum(dataTarg) / float64(len(dataTarg))
	return func(indImage *imgut.Image) float64 {
		// Images to vector
		dataInd := imgut.ToSlice(indImage)
		// Compute average pixels
		avgy := floats.Sum(dataInd) / float64(len(dataInd))
		// Difference y - avgy
		y_avgy := make([]float64, len(dataInd))
		copy(y_avgy, dataInd)
		floats.AddConst(-avgy, y_avgy)
		// Difference t - avgt
		t_avgt := make([]float64, len(dataTarg))
		copy(t_avgt, dataTarg)
		floats.AddConst(-avgt, t_avgt)
		// Multuplication (t - avgt)(y - avgy)
		floats.Mul(t_avgt, y_avgy)
		// Summation
		numerator := floats.Sum(t_avgt)
		// Square (y - avgy)^2
		floats.Mul(y_avgy, y_avgy)
		denomin := floats.Sum(y_avgy)
		// Compute b-value
		b := numerator / denomin
		// Compute a-value
		a := avgt - b*avgy

		// Compute now the scaled RMSE, using y' = a + b*y
		floats.Scale(b, dataInd)      // b*y
		floats.AddConst(a, dataInd)   // a + b*y
		floats.Sub(dataInd, dataTarg) // (a + b * y - t)
		floats.Mul(dataInd, dataInd)  // (a + b * y - t)^2
		total := floats.Sum(dataInd)  // Sum(...)
		return math.Sqrt(total / float64(len(dataInd)))
	}
}
Beispiel #21
0
// computeMove computes how far can be moved replacing each index. The results
// are stored into move.
func computeMove(move []float64, minIdx int, A mat64.Matrix, ab *mat64.Dense, xb []float64, nonBasicIdx []int) error {
	// Find ae.
	col := mat64.Col(nil, nonBasicIdx[minIdx], A)
	aCol := mat64.NewVector(len(col), col)

	// d = - Ab^-1 Ae
	nb, _ := ab.Dims()
	d := make([]float64, nb)
	dVec := mat64.NewVector(nb, d)
	err := dVec.SolveVec(ab, aCol)
	if err != nil {
		return ErrLinSolve
	}
	floats.Scale(-1, d)

	for i, v := range d {
		if math.Abs(v) < dRoundTol {
			d[i] = 0
		}
	}

	// If no di < 0, then problem is unbounded.
	if floats.Min(d) >= 0 {
		return ErrUnbounded
	}

	// move = bhat_i / - d_i, assuming d is negative.
	bHat := xb // ab^-1 b
	for i, v := range d {
		if v >= 0 {
			move[i] = math.Inf(1)
		} else {
			move[i] = bHat[i] / math.Abs(v)
		}
	}
	return nil
}
Beispiel #22
0
func (g *GP) marginalLikelihoodDerivative(x, grad []float64, trainNoise bool, mem *margLikeMemory) {
	// d/dTheta_j log[(p|X,theta)] =
	//		1/2 * y^T * K^-1 dK/dTheta_j * K^-1 * y - 1/2 * tr(K^-1 * dK/dTheta_j)
	//		1/2 * α^T * dK/dTheta_j * α - 1/2 * tr(K^-1 dK/dTheta_j)
	// Multiply by the same -2
	//		-α^T * K^-1 * α + tr(K^-1 dK/dTheta_j)
	// This first computation is an inner product.
	n := len(g.outputs)
	nHyper := g.kernel.NumHyper()
	k := mem.k
	chol := mem.chol
	alpha := mem.alpha
	dKdTheta := mem.dKdTheta
	kInvDK := mem.kInvDK

	y := mat64.NewVector(n, g.outputs)

	var noise float64
	if trainNoise {
		noise = math.Exp(x[len(x)-1])
	} else {
		noise = g.noise
	}

	// If x is the same, then reuse what has been computed in the function.
	if !floats.Equal(mem.lastX, x) {
		copy(mem.lastX, x)
		g.kernel.SetHyper(x[:nHyper])
		g.setKernelMat(k, noise)
		//chol.Cholesky(k, false)
		chol.Factorize(k)
		alpha.SolveCholeskyVec(chol, y)
	}
	g.setKernelMatDeriv(dKdTheta, trainNoise, noise)
	for i := range dKdTheta {
		kInvDK.SolveCholesky(chol, dKdTheta[i])
		inner := mat64.Inner(alpha, dKdTheta[i], alpha)
		grad[i] = -inner + mat64.Trace(kInvDK)
	}
	floats.Scale(1/float64(n), grad)

	bounds := g.kernel.Bounds()
	if trainNoise {
		bounds = append(bounds, Bound{minLogNoise, maxLogNoise})
	}
	barrierGrad := make([]float64, len(grad))
	for i, v := range x {
		// Quadratic barrier penalty.
		if v < bounds[i].Min {
			diff := bounds[i].Min - v
			barrierGrad[i] = -(barrierPow) * math.Pow(diff, barrierPow-1)
		}
		if v > bounds[i].Max {
			diff := v - bounds[i].Max
			barrierGrad[i] = (barrierPow) * math.Pow(diff, barrierPow-1)
		}
	}
	fmt.Println("noise, minNoise", x[len(x)-1], bounds[len(x)-1].Min)
	fmt.Println("barrier Grad", barrierGrad)
	floats.Add(grad, barrierGrad)
	//copy(grad, barrierGrad)
}
Beispiel #23
0
func (lbfgs *Lbfgs) Iterate(loc *multi.Location, obj *uni.Objective, grad *multi.Gradient, fun optimize.MultiObjGrad) (status.Status, error) {
	counter := lbfgs.counter
	q := lbfgs.q
	a := lbfgs.a
	b := lbfgs.b
	rhoHist := lbfgs.rhoHist
	sHist := lbfgs.sHist
	yHist := lbfgs.yHist
	gamma_k := lbfgs.gamma_k
	tmp := lbfgs.tmp
	p_k := lbfgs.p_k
	s_k := lbfgs.s_k
	y_k := lbfgs.y_k
	z := lbfgs.z

	// Calculate search direction
	for i, val := range grad.Curr() {
		q[i] = val
	}
	for i := counter - 1; i >= 0; i-- {
		a[i] = rhoHist[i] * floats.Dot(sHist[i], q)
		copy(tmp, yHist[i])
		floats.Scale(a[i], tmp)
		floats.Sub(q, tmp)
	}
	for i := lbfgs.NumStore - 1; i >= counter; i-- {
		a[i] = rhoHist[i] * floats.Dot(sHist[i], q)
		copy(tmp, yHist[i])
		floats.Scale(a[i], tmp)
		//fmt.Println(q)
		//fmt.Println(tmp)
		floats.Sub(q, tmp)
	}

	// Assume H_0 is the identity times gamma_k
	copy(z, q)
	floats.Scale(gamma_k, z)
	// Second loop for update, going oldest to newest
	for i := counter; i < lbfgs.NumStore; i++ {
		b[i] = rhoHist[i] * floats.Dot(yHist[i], z)
		copy(tmp, sHist[i])
		floats.Scale(a[i]-b[i], tmp)
		floats.Add(z, tmp)
	}
	for i := 0; i < counter; i++ {
		b[i] = rhoHist[i] * floats.Dot(yHist[i], z)
		copy(tmp, sHist[i])
		floats.Scale(a[i]-b[i], tmp)
		floats.Add(z, tmp)
	}

	lbfgs.a = a
	lbfgs.b = b

	copy(p_k, z)
	floats.Scale(-1, p_k)
	normP_k := floats.Norm(p_k, 2)

	// Perform line search -- need to find some way to implement this, especially bookkeeping function values
	linesearchResult, err := linesearch.Linesearch(fun, lbfgs.LinesearchMethod, lbfgs.LinesearchSettings, lbfgs.Wolfe, p_k, loc.Curr(), obj.Curr(), grad.Curr())

	// In the future add a check to switch to a different linesearcher?
	if err != nil {
		return status.LinesearchFailure, err
	}
	x_kp1 := linesearchResult.Loc
	f_kp1 := linesearchResult.Obj
	g_kp1 := linesearchResult.Grad
	alpha_k := linesearchResult.Step

	// Update hessian estimate
	copy(s_k, p_k)
	floats.Scale(alpha_k, s_k)

	copy(y_k, g_kp1)
	floats.Sub(y_k, grad.Curr())
	skDotYk := floats.Dot(s_k, y_k)

	// Bookkeep the results
	stepSize := alpha_k * normP_k
	lbfgs.step.AddToHist(stepSize)
	lbfgs.step.SetCurr(stepSize)
	loc.SetCurr(x_kp1)
	//lbfgs.loc.AddToHist(x_kp1)

	//fmt.Println(lbfgs.loc.GetHist())
	obj.SetCurr(f_kp1)
	grad.SetCurr(g_kp1)

	copy(sHist[counter], s_k)
	copy(yHist[counter], y_k)
	rhoHist[counter] = 1 / skDotYk

	lbfgs.gamma_k = skDotYk / floats.Dot(y_k, y_k)

	lbfgs.counter += 1
	if lbfgs.counter == lbfgs.NumStore {
		lbfgs.counter = 0
	}
	return status.Continue, nil
}
Beispiel #24
0
func (b *BatchGradient) funcGrad(params, deriv []float64) float64 {
	nParameters := len(deriv)

	// Send out all of the work
	done := make(chan result)
	sz := b.nSamples / b.Workers
	sent := 0
	for i := 0; i < b.Workers; i++ {
		outputDim := b.outputDim
		last := sent + sz
		if i == b.Workers-1 {
			last = b.nSamples
		}
		go func(sent, last int) {
			lossDeriver := b.Trainable.NewLossDeriver()
			predOutput := make([]float64, outputDim)
			dLossDPred := make([]float64, outputDim)
			dLossDParam := make([]float64, nParameters)
			outputs := make([]float64, outputDim)
			tmpderiv := make([]float64, nParameters)
			var totalLoss float64
			for i := sent; i < last; i++ {
				lossDeriver.Predict(params, b.features.RawRowView(i), predOutput)
				b.Outputs.Row(outputs, i)
				loss := b.Losser.LossDeriv(predOutput, outputs, dLossDPred)
				if b.Weights == nil {
					totalLoss += loss
				} else {
					totalLoss += b.Weights[i] * loss
				}
				lossDeriver.Deriv(params, b.features.RawRowView(i), predOutput, dLossDPred, dLossDParam)
				if b.Weights != nil {
					floats.Scale(b.Weights[i], dLossDParam)
				}
				floats.Add(tmpderiv, dLossDParam)
			}
			done <- result{totalLoss, tmpderiv}
		}(sent, last)
		sent += sz
	}
	// Collect all the results
	var totalLoss float64
	for i := range deriv {
		deriv[i] = 0
	}
	for i := 0; i < b.Workers; i++ {
		w := <-done
		totalLoss += w.loss
		floats.Add(deriv, w.deriv)
	}
	// Compute the regularizer
	if b.Regularizer != nil {
		tmp := make([]float64, nParameters)
		totalLoss += b.Regularizer.LossDeriv(params, tmp)
		floats.Add(deriv, tmp)
	}
	sumWeights := float64(b.nSamples)
	if b.Weights != nil {
		sumWeights = floats.Sum(b.Weights)
	}
	totalLoss /= sumWeights
	floats.Scale(1/sumWeights, deriv)
	return totalLoss
}
Beispiel #25
0
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
	if len(loc.X) != b.dim {
		panic("bfgs: unexpected size mismatch")
	}
	if len(loc.Gradient) != b.dim {
		panic("bfgs: unexpected size mismatch")
	}
	if len(dir) != b.dim {
		panic("bfgs: unexpected size mismatch")
	}

	// Compute the gradient difference in the last step
	// y = g_{k+1} - g_{k}
	floats.SubTo(b.y, loc.Gradient, b.grad)

	// Compute the step difference
	// s = x_{k+1} - x_{k}
	floats.SubTo(b.s, loc.X, b.x)

	sDotY := floats.Dot(b.s, b.y)
	sDotYSquared := sDotY * sDotY

	if b.first {
		// Rescale the initial hessian.
		// From: Numerical optimization, Nocedal and Wright, Page 143, Eq. 6.20 (second edition).
		yDotY := floats.Dot(b.y, b.y)
		scale := sDotY / yDotY
		for i := 0; i < len(loc.X); i++ {
			for j := 0; j < len(loc.X); j++ {
				if i == j {
					b.invHess.SetSym(i, i, scale)
				} else {
					b.invHess.SetSym(i, j, 0)
				}
			}
		}
		b.first = false
	}

	// Compute the update rule
	//     B_{k+1}^-1
	// First term is just the existing inverse hessian
	// Second term is
	//     (sk^T yk + yk^T B_k^-1 yk)(s_k sk_^T) / (sk^T yk)^2
	// Third term is
	//     B_k ^-1 y_k sk^T + s_k y_k^T B_k-1
	//
	// y_k^T B_k^-1 y_k is a scalar, and the third term is a rank-two update
	// where B_k^-1 y_k is one vector and s_k is the other. Compute the update
	// values then actually perform the rank updates.
	yBy := mat64.Inner(b.yVec, b.invHess, b.yVec)
	firstTermConst := (sDotY + yBy) / (sDotYSquared)
	b.tmpVec.MulVec(b.invHess, b.yVec)

	b.invHess.RankTwo(b.invHess, -1/sDotY, b.tmpVec, b.sVec)
	b.invHess.SymRankOne(b.invHess, firstTermConst, b.sVec)

	// update the bfgs stored data to the new iteration
	copy(b.x, loc.X)
	copy(b.grad, loc.Gradient)

	// Compute the new search direction
	d := mat64.NewVector(b.dim, dir)
	g := mat64.NewVector(b.dim, loc.Gradient)

	d.MulVec(b.invHess, g) // new direction stored in place
	floats.Scale(-1, dir)
	return 1
}
Beispiel #26
0
func (g *GradOptimizable) Init() error {
	if g.Losser == nil {
		g.Losser = loss.SquaredDistance{}
	}
	if g.Regularizer == nil {
		g.Regularizer = regularize.None{}
	}
	if g.Sampler == nil {
		g.Sampler = &Batch{}
	}

	if g.Inputs == nil {
		return errors.New("No input data")
	}

	nSamples, _ := g.Inputs.Dims()
	if nSamples == 0 {
		return errors.New("No input data")
	}
	if g.NumWorkers == 0 {
		g.NumWorkers = 1
	}

	outputSamples, outputDim := g.Outputs.Dims()
	if outputSamples != nSamples {
		return errors.New("gradoptimize: input and output row mismatch")
	}

	nParameters := g.Trainable.NumParameters()

	batches := make([]batchSend, g.NumWorkers+1) // +1 is for regularizer
	for i := range batches {
		batches[i].deriv = make([]float64, nParameters)
	}
	g.batches = batches

	g.grainSize = g.Trainable.GrainSize()

	g.Sampler.Init(nSamples)

	g.features = FeaturizeTrainable(g.Trainable, g.Inputs, nil)

	work := make(chan batchSend, g.NumWorkers)
	done := make(chan batchSend, g.NumWorkers)
	regularizeChan := make(chan batchSend, 1)
	regDone := make(chan batchSend, 1)
	quit := make(chan struct{})

	g.sendWork = work
	g.receiveWork = done
	g.quit = quit
	g.regularizeChan = regularizeChan
	g.regDone = regDone

	// launch workers
	for worker := 0; worker < g.NumWorkers; worker++ {
		go func(outputDim, nParameterss int) {
			lossDeriver := g.Trainable.NewLossDeriver()
			predOutput := make([]float64, outputDim)
			dLossDPred := make([]float64, outputDim)
			dLossDParam := make([]float64, nParameters)
			outputs := make([]float64, outputDim)
			for {
				select {
				case w := <-work:
					// Zero out existing derivative
					w.loss = 0
					for i := range w.deriv {
						w.deriv[i] = 0
					}
					for _, idx := range w.idxs {
						lossDeriver.Predict(w.parameters, g.features.RawRowView(idx), predOutput)
						g.Outputs.Row(outputs, idx)
						loss := g.Losser.LossDeriv(predOutput, outputs, dLossDPred)
						if g.Weights == nil {
							w.loss += loss
						} else {
							w.loss += g.Weights[idx] * loss
						}
						lossDeriver.Deriv(w.parameters, g.features.RawRowView(idx), predOutput, dLossDPred, dLossDParam)
						if g.Weights != nil {
							floats.Scale(g.Weights[idx], dLossDParam)
						}
						floats.Add(w.deriv, dLossDParam)
					}
					// Send the result back
					done <- w
				case <-quit:
					return
				}
			}
		}(outputDim, nParameters)
	}

	// launch regularizer
	go func() {
		for {
			select {
			case w := <-regularizeChan:
				loss := g.Regularizer.LossDeriv(w.parameters, w.deriv)
				w.loss = loss
				regDone <- w
			case <-quit:
				return
			}
		}
	}()
	return nil
}
Beispiel #27
0
func testDtrevc3(t *testing.T, impl Dtrevc3er, side lapack.EigVecSide, howmny lapack.HowMany, tmat blas64.General, optwork bool, rnd *rand.Rand) {
	const tol = 1e-14

	n := tmat.Rows
	extra := tmat.Stride - tmat.Cols
	right := side != lapack.LeftEigVec
	left := side != lapack.RightEigVec

	var selected, selectedWant []bool
	var mWant int // How many columns will the eigenvectors occupy.
	if howmny == lapack.SelectedEigVec {
		selected = make([]bool, n)
		selectedWant = make([]bool, n)
		// Dtrevc3 will compute only selected eigenvectors. Pick them
		// randomly disregarding whether they are real or complex.
		for i := range selected {
			if rnd.Float64() < 0.5 {
				selected[i] = true
			}
		}
		// Dtrevc3 will modify (standardize) the slice selected based on
		// whether the corresponding eigenvalues are real or complex. Do
		// the same process here to fill selectedWant.
		for i := 0; i < n; {
			if i == n-1 || tmat.Data[(i+1)*tmat.Stride+i] == 0 {
				// Real eigenvalue.
				if selected[i] {
					selectedWant[i] = true
					mWant++ // Real eigenvectors occupy one column.
				}
				i++
			} else {
				// Complex eigenvalue.
				if selected[i] || selected[i+1] {
					// Dtrevc3 will modify selected so that
					// only the first element of the pair is
					// true.
					selectedWant[i] = true
					mWant += 2 // Complex eigenvectors occupy two columns.
				}
				i += 2
			}
		}
	} else {
		// All eigenvectors occupy n columns.
		mWant = n
	}

	var vr blas64.General
	if right {
		if howmny == lapack.AllEigVecMulQ {
			vr = eye(n, n+extra)
		} else {
			// VR will be overwritten.
			vr = nanGeneral(n, mWant, n+extra)
		}
	}

	var vl blas64.General
	if left {
		if howmny == lapack.AllEigVecMulQ {
			vl = eye(n, n+extra)
		} else {
			// VL will be overwritten.
			vl = nanGeneral(n, mWant, n+extra)
		}
	}

	work := make([]float64, max(1, 3*n))
	if optwork {
		impl.Dtrevc3(side, howmny, nil, n, nil, 1, nil, 1, nil, 1, mWant, work, -1)
		work = make([]float64, int(work[0]))
	}

	m := impl.Dtrevc3(side, howmny, selected, n, tmat.Data, tmat.Stride,
		vl.Data, vl.Stride, vr.Data, vr.Stride, mWant, work, len(work))

	prefix := fmt.Sprintf("Case side=%v, howmny=%v, n=%v, extra=%v, optwk=%v",
		side, howmny, n, extra, optwork)

	if !generalOutsideAllNaN(tmat) {
		t.Errorf("%v: out-of-range write to T", prefix)
	}
	if !generalOutsideAllNaN(vl) {
		t.Errorf("%v: out-of-range write to VL", prefix)
	}
	if !generalOutsideAllNaN(vr) {
		t.Errorf("%v: out-of-range write to VR", prefix)
	}

	if m != mWant {
		t.Errorf("%v: unexpected value of m. Want %v, got %v", prefix, mWant, m)
	}

	if howmny == lapack.SelectedEigVec {
		for i := range selected {
			if selected[i] != selectedWant[i] {
				t.Errorf("%v: unexpected selected[%v]", prefix, i)
			}
		}
	}

	// Check that the columns of VR and VL are actually eigenvectors and
	// that the magnitude of their largest element is 1.
	var k int
	for j := 0; j < n; {
		re := tmat.Data[j*tmat.Stride+j]
		if j == n-1 || tmat.Data[(j+1)*tmat.Stride+j] == 0 {
			if howmny == lapack.SelectedEigVec && !selected[j] {
				j++
				continue
			}
			if right {
				ev := columnOf(vr, k)
				norm := floats.Norm(ev, math.Inf(1))
				if math.Abs(norm-1) > tol {
					t.Errorf("%v: magnitude of largest element of VR[:,%v] not 1", prefix, k)
				}
				if !isRightEigenvectorOf(tmat, ev, nil, complex(re, 0), tol) {
					t.Errorf("%v: VR[:,%v] is not real right eigenvector", prefix, k)
				}
			}
			if left {
				ev := columnOf(vl, k)
				norm := floats.Norm(ev, math.Inf(1))
				if math.Abs(norm-1) > tol {
					t.Errorf("%v: magnitude of largest element of VL[:,%v] not 1", prefix, k)
				}
				if !isLeftEigenvectorOf(tmat, ev, nil, complex(re, 0), tol) {
					t.Errorf("%v: VL[:,%v] is not real left eigenvector", prefix, k)
				}
			}
			k++
			j++
			continue
		}
		if howmny == lapack.SelectedEigVec && !selected[j] {
			j += 2
			continue
		}
		im := math.Sqrt(math.Abs(tmat.Data[(j+1)*tmat.Stride+j])) *
			math.Sqrt(math.Abs(tmat.Data[j*tmat.Stride+j+1]))
		if right {
			evre := columnOf(vr, k)
			evim := columnOf(vr, k+1)
			var evmax float64
			for i, v := range evre {
				evmax = math.Max(evmax, math.Abs(v)+math.Abs(evim[i]))
			}
			if math.Abs(evmax-1) > tol {
				t.Errorf("%v: magnitude of largest element of VR[:,%v] not 1", prefix, k)
			}
			if !isRightEigenvectorOf(tmat, evre, evim, complex(re, im), tol) {
				t.Errorf("%v: VR[:,%v:%v] is not complex right eigenvector", prefix, k, k+1)
			}
			floats.Scale(-1, evim)
			if !isRightEigenvectorOf(tmat, evre, evim, complex(re, -im), tol) {
				t.Errorf("%v: VR[:,%v:%v] is not complex right eigenvector", prefix, k, k+1)
			}
		}
		if left {
			evre := columnOf(vl, k)
			evim := columnOf(vl, k+1)
			var evmax float64
			for i, v := range evre {
				evmax = math.Max(evmax, math.Abs(v)+math.Abs(evim[i]))
			}
			if math.Abs(evmax-1) > tol {
				t.Errorf("%v: magnitude of largest element of VL[:,%v] not 1", prefix, k)
			}
			if !isLeftEigenvectorOf(tmat, evre, evim, complex(re, im), tol) {
				t.Errorf("%v: VL[:,%v:%v] is not complex left eigenvector", prefix, k, k+1)
			}
			floats.Scale(-1, evim)
			if !isLeftEigenvectorOf(tmat, evre, evim, complex(re, -im), tol) {
				t.Errorf("%v: VL[:,%v:%v] is not complex left eigenvector", prefix, k, k+1)
			}
		}
		k += 2
		j += 2
	}
}
Beispiel #28
0
// Convert converts a General-form LP into a standard form LP.
// The general form of an LP is:
//  minimize c^T * x
//  s.t      G * x <= h
//           A * x = b
// And the standard form is:
//  minimize cNew^T * x
//  s.t      aNew * x = bNew
//           x >= 0
// If there are no constraints of the given type, the inputs may be nil.
func Convert(c []float64, g mat64.Matrix, h []float64, a mat64.Matrix, b []float64) (cNew []float64, aNew *mat64.Dense, bNew []float64) {
	nVar := len(c)
	nIneq := len(h)

	// Check input sizes.
	if g == nil {
		if nIneq != 0 {
			panic(badShape)
		}
	} else {
		gr, gc := g.Dims()
		if gr != nIneq {
			panic(badShape)
		}
		if gc != nVar {
			panic(badShape)
		}
	}

	nEq := len(b)
	if a == nil {
		if nEq != 0 {
			panic(badShape)
		}
	} else {
		ar, ac := a.Dims()
		if ar != nEq {
			panic(badShape)
		}
		if ac != nVar {
			panic(badShape)
		}
	}

	// Convert the general form LP.
	// Derivation:
	// 0. Start with general form
	//  min.	c^T * x
	//  s.t.	G * x <= h
	//  		A * x = b
	// 1. Introduce slack variables for each constraint
	//  min. 	c^T * x
	//  s.t.	G * x + s = h
	//			A * x = b
	//      	s >= 0
	// 2. Add non-negativity constraints for x by splitting x
	// into positive and negative components.
	//   x = xp - xn
	//   xp >= 0, xn >= 0
	// This makes the LP
	//  min.	c^T * xp - c^T xn
	//  s.t. 	G * xp - G * xn + s = h
	//			A * xp  - A * xn = b
	//			xp >= 0, xn >= 0, s >= 0
	// 3. Write the above in standard form:
	//  xt = [xp
	//	 	  xn
	//		  s ]
	//  min.	[c^T, -c^T, 0] xt
	//  s.t.	[G, -G, I] xt = h
	//   		[A, -A, 0] xt = b
	//			x >= 0

	// In summary:
	// Original LP:
	//  min.	c^T * x
	//  s.t.	G * x <= h
	//  		A * x = b
	// Standard Form:
	//  xt = [xp; xn; s]
	//  min.	[c^T, -c^T, 0] xt
	//  s.t.	[G, -G, I] xt = h
	//   		[A, -A, 0] xt = b
	//			x >= 0

	// New size of x is [xp, xn, s]
	nNewVar := nVar + nVar + nIneq

	// Construct cNew = [c; -c; 0]
	cNew = make([]float64, nNewVar)
	copy(cNew, c)
	copy(cNew[nVar:], c)
	floats.Scale(-1, cNew[nVar:2*nVar])

	// New number of equality constraints is the number of total constraints.
	nNewEq := nIneq + nEq

	// Construct bNew = [h, b].
	bNew = make([]float64, nNewEq)
	copy(bNew, h)
	copy(bNew[nIneq:], b)

	// Construct aNew = [G, -G, I; A, -A, 0].
	aNew = mat64.NewDense(nNewEq, nNewVar, nil)
	if nIneq != 0 {
		aView := (aNew.View(0, 0, nIneq, nVar)).(*mat64.Dense)
		aView.Copy(g)
		aView = (aNew.View(0, nVar, nIneq, nVar)).(*mat64.Dense)
		aView.Scale(-1, g)
		aView = (aNew.View(0, 2*nVar, nIneq, nIneq)).(*mat64.Dense)
		for i := 0; i < nIneq; i++ {
			aView.Set(i, i, 1)
		}
	}
	if nEq != 0 {
		aView := (aNew.View(nIneq, 0, nEq, nVar)).(*mat64.Dense)
		aView.Copy(a)
		aView = (aNew.View(nIneq, nVar, nEq, nVar)).(*mat64.Dense)
		aView.Scale(-1, a)
	}
	return cNew, aNew, bNew
}
Beispiel #29
0
func (g *GradientDescent) NextDirection(loc *Location, dir []float64) (stepSize float64) {
	copy(dir, loc.Gradient)
	floats.Scale(-1, dir)
	return g.StepSizer.StepSize(loc, dir)
}
Beispiel #30
0
// Normalize the vector of value, summing them and dividing each by the total
func normalSlice(v []float64) {
	tot := floats.Sum(v)
	floats.Scale(1.0/tot, v)
}