Пример #1
0
// Not callable in parallel because of the batches
func (g *GradOptimizable) FuncGrad(params []float64, deriv []float64) float64 {
	inds := g.Sampler.Iterate()
	total := len(inds)

	var totalLoss float64
	for i := range deriv {
		deriv[i] = 0
	}

	// Send the regularizer
	g.batches[0].parameters = params
	g.regularizeChan <- g.batches[0]

	// Send initial batches out
	var initBatches int
	var lastSent int
	for i := 0; i < g.NumWorkers; i++ {
		if lastSent == total {
			break
		}
		add := g.grainSize
		if lastSent+add >= total {
			add = total - lastSent
		}
		initBatches++
		g.batches[i+1].idxs = inds[lastSent : lastSent+add]
		g.batches[i+1].parameters = params
		g.sendWork <- g.batches[i+1]
		lastSent += add
	}

	// Collect the batches and resend out
	for lastSent < total {
		batch := <-g.receiveWork
		totalLoss += batch.loss
		floats.Add(deriv, batch.deriv)
		add := g.grainSize
		if lastSent+add >= total {
			add = total - lastSent
		}
		batch.idxs = inds[lastSent : lastSent+add]
		g.sendWork <- batch
		lastSent += add
	}

	// All inds sent, so just weight for all the collection
	for i := 0; i < initBatches; i++ {
		batch := <-g.receiveWork
		totalLoss += batch.loss
		floats.Add(deriv, batch.deriv)
	}
	batch := <-g.regDone
	totalLoss += batch.loss
	floats.Add(deriv, batch.deriv)

	totalLoss /= float64(len(inds))
	floats.Scale(1/float64(len(inds)), deriv)
	return totalLoss
}
Пример #2
0
// UpdateOne updates sufficient statistics using one observation.
func (g *Model) UpdateOne(o model.Obs, w float64) {

	glog.V(6).Infof("gaussian update, name:%s, obs:%v, weight:%e", g.ModelName, o, w)

	/* Update sufficient statistics. */
	obs, _, _ := model.ObsToF64(o)
	floatx.Apply(floatx.ScaleFunc(w), obs, g.tmpArray)
	floats.Add(g.Sumx, g.tmpArray)
	floatx.Sq(g.tmpArray, obs)
	floats.Scale(w, g.tmpArray)
	floats.Add(g.Sumxsq, g.tmpArray)
	g.NSamples += w
}
Пример #3
0
func TestJensenShannon(t *testing.T) {
	for i, test := range []struct {
		p []float64
		q []float64
	}{
		{
			p: []float64{0.5, 0.1, 0.3, 0.1},
			q: []float64{0.1, 0.4, 0.25, 0.25},
		},
		{
			p: []float64{0.4, 0.6, 0.0},
			q: []float64{0.2, 0.2, 0.6},
		},
		{
			p: []float64{0.1, 0.1, 0.0, 0.8},
			q: []float64{0.6, 0.3, 0.0, 0.1},
		},
		{
			p: []float64{0.5, 0.1, 0.3, 0.1},
			q: []float64{0.5, 0, 0.25, 0.25},
		},
		{
			p: []float64{0.5, 0.1, 0, 0.4},
			q: []float64{0.1, 0.4, 0.25, 0.25},
		},
	} {

		m := make([]float64, len(test.p))
		p := test.p
		q := test.q
		floats.Add(m, p)
		floats.Add(m, q)
		floats.Scale(0.5, m)

		js1 := 0.5*KullbackLeibler(p, m) + 0.5*KullbackLeibler(q, m)
		js2 := JensenShannon(p, q)

		if math.IsNaN(js2) {
			t.Errorf("In case %v, JS distance is NaN", i)
		}

		if math.Abs(js1-js2) > 1e-14 {
			t.Errorf("JS mismatch case %v. Expected %v, found %v.", i, js1, js2)
		}
	}
	if !Panics(func() { JensenShannon(make([]float64, 3), make([]float64, 2)) }) {
		t.Errorf("JensenShannon did not panic with p, q length mismatch")
	}
}
Пример #4
0
// transformNormal performs the same operation as TransformNormal except no
// safety checks are performed and both input slices must be non-nil.
func (n *Normal) transformNormal(dst, normal []float64) []float64 {
	srcVec := mat64.NewVector(n.dim, normal)
	dstVec := mat64.NewVector(n.dim, dst)
	dstVec.MulVec(&n.lower, srcVec)
	floats.Add(dst, n.mu)
	return dst
}
Пример #5
0
// returnNext updates the location based on the iteration type and the current
// simplex, and returns the next operation.
func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) {
	n.lastIter = iter
	switch iter {
	case nmMajor:
		// Fill loc with the current best point and value,
		// and command a convergence check.
		copy(loc.X, n.vertices[0])
		loc.F = n.values[0]
		return MajorIteration, nil
	case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
		// x_new = x_centroid + scale * (x_centroid - x_worst)
		var scale float64
		switch iter {
		case nmReflected:
			scale = n.reflection
		case nmExpanded:
			scale = n.reflection * n.expansion
		case nmContractedOutside:
			scale = n.reflection * n.contraction
		case nmContractedInside:
			scale = -n.contraction
		}
		dim := len(loc.X)
		floats.SubTo(loc.X, n.centroid, n.vertices[dim])
		floats.Scale(scale, loc.X)
		floats.Add(loc.X, n.centroid)
		if iter == nmReflected {
			copy(n.reflectedPoint, loc.X)
		}
		return FuncEvaluation, nil
	case nmShrink:
		// x_shrink = x_best + delta * (x_i + x_best)
		floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0])
		floats.Scale(n.shrink, loc.X)
		floats.Add(loc.X, n.vertices[0])
		return FuncEvaluation, nil
	default:
		panic("unreachable")
	}
}
Пример #6
0
// ObjDeriv computes the objective value and stores the derivative in place
func (g *BatchGradBased) ObjGrad(parameters []float64, derivative []float64) (loss float64) {
	c := make(chan lossDerivStruct, 10)

	// Set the channel for parallel for
	f := func(start, end int) {
		g.lossDerivFunc(start, end, c, parameters)
	}

	go func() {
		wg := &sync.WaitGroup{}
		// Compute the losses and the derivatives all in parallel
		wg.Add(2)
		go func() {
			common.ParallelFor(g.nTrain, g.grainSize, f)
			wg.Done()
		}()
		// Compute the regularization
		go func() {
			deriv := make([]float64, g.nParameters)
			loss := g.regularizer.LossDeriv(parameters, deriv)
			//fmt.Println("regularizer loss = ", loss)
			//fmt.Println("regularizer deriv = ", deriv)
			c <- lossDerivStruct{
				loss:  loss,
				deriv: deriv,
			}
			wg.Done()
		}()
		// Wait for all of the results to be sent on the channel
		wg.Wait()
		// Close the channel
		close(c)
	}()
	// zero the derivative
	for i := range derivative {
		derivative[i] = 0
	}

	// Range over the channel, incrementing the loss and derivative
	// as they come in
	for l := range c {
		loss += l.loss
		floats.Add(derivative, l.deriv)
	}
	//fmt.Println("nTrain", g.nTrain)
	//fmt.Println("final deriv", derivative)
	// Normalize by the number of training samples
	loss /= float64(g.nTrain)
	floats.Scale(1/float64(g.nTrain), derivative)

	return loss
}
Пример #7
0
// returnNext finds the next location to evaluate, stores the location in xNext,
// and returns the data
func (n *NelderMead) returnNext(iter nmIterType, xNext []float64) (EvaluationType, IterationType, error) {
	dim := len(xNext)
	n.lastIter = iter
	switch iter {
	case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
		// x_new = x_centroid + scale * (x_centroid - x_worst)
		var scale float64
		switch iter {
		case nmReflected:
			scale = n.reflection
		case nmExpanded:
			scale = n.reflection * n.expansion
		case nmContractedOutside:
			scale = n.reflection * n.contraction
		case nmContractedInside:
			scale = -n.contraction
		}
		floats.SubTo(xNext, n.centroid, n.vertices[dim])
		floats.Scale(scale, xNext)
		floats.Add(xNext, n.centroid)
		if iter == nmReflected {
			copy(n.reflectedPoint, xNext)
			// Nelder Mead iterations start with Reflection step
			return FuncEvaluation, MajorIteration, nil
		}
		return FuncEvaluation, MinorIteration, nil
	case nmShrink:
		// x_shrink = x_best + delta * (x_i + x_best)
		floats.SubTo(xNext, n.vertices[n.fillIdx], n.vertices[0])
		floats.Scale(n.shrink, xNext)
		floats.Add(xNext, n.vertices[0])
		return FuncEvaluation, SubIteration, nil
	default:
		panic("unreachable")
	}
}
Пример #8
0
// Rand generates a random number according to the distributon.
// If the input slice is nil, new memory is allocated, otherwise the result is stored
// in place.
func (n *Normal) Rand(x []float64) []float64 {
	x = reuseAs(x, n.dim)
	tmp := make([]float64, n.dim)
	if n.src == nil {
		for i := range x {
			tmp[i] = rand.NormFloat64()
		}
	} else {
		for i := range x {
			tmp[i] = n.src.NormFloat64()
		}
	}
	tmpVec := mat64.NewVector(n.dim, tmp)
	xVec := mat64.NewVector(n.dim, x)
	xVec.MulVec(n.chol, true, tmpVec)
	floats.Add(x, n.mu)
	return x
}
Пример #9
0
// Estimate computes model parameters using sufficient statistics.
func (gmm *Model) UpdateOne(o model.Obs, w float64) {

	obs, _, _ := model.ObsToF64(o)
	maxProb := gmm.logProbInternal(obs, gmm.tmpProbs)
	gmm.Likelihood += maxProb
	floatx.Apply(floatx.AddScalarFunc(-maxProb+math.Log(w)), gmm.tmpProbs, nil)

	// Compute posterior probabilities.
	floatx.Exp(gmm.tmpProbs, gmm.tmpProbs)

	// Update posterior sum, needed to compute mixture weights.
	floats.Add(gmm.PosteriorSum, gmm.tmpProbs)

	// Update Gaussian components.
	for i, c := range gmm.Components {
		c.UpdateOne(o, gmm.tmpProbs[i])
	}

	// Count number of observations.
	gmm.NSamples += w
}
Пример #10
0
// NewBatchGradBased creates a new batch grad based with the given inputs
func NewBatchGradBased(trainable Trainable, cacheFeatures bool, inputs, outputs common.RowMatrix, weights []float64, losser loss.DerivLosser, regularizer regularize.Regularizer) *BatchGradBased {
	var features *mat64.Dense
	if cacheFeatures {
		features = FeaturizeTrainable(trainable, inputs, nil)
	}

	// TODO: Add in error checking

	if losser == nil {
		losser = loss.SquaredDistance{}
	}
	if regularizer == nil {
		regularizer = regularize.None{}
	}

	if weights != nil {
		// TODO: Fix weights
		panic("non-nil weights")
	}

	nTrain, outputDim := outputs.Dims()
	_, inputDim := inputs.Dims()
	g := &BatchGradBased{
		t:           trainable,
		inputs:      inputs,
		outputs:     outputs,
		features:    features,
		losser:      losser,
		regularizer: regularizer,
		nTrain:      nTrain,
		outputDim:   outputDim,
		inputDim:    inputDim,
		nParameters: trainable.NumParameters(),
		grainSize:   trainable.GrainSize(),
	}

	// TODO: Add in row viewer stuff
	// TODO: Create a different function for computing just the loss
	//inputRowViewer, ok := inputs.(mat64.RowViewer)
	//outputRowViewer, ok := outputs.(mat64.RowViewer)

	// TODO: Move this to its own function
	var f func(start, end int, c chan lossDerivStruct, parameters []float64)

	switch {
	default:
		panic("Shouldn't be here")
	case cacheFeatures:
		f = func(start, end int, c chan lossDerivStruct, parameters []float64) {
			lossDeriver := g.t.NewLossDeriver()
			prediction := make([]float64, g.outputDim)
			dLossDPred := make([]float64, g.outputDim)
			dLossDWeight := make([]float64, g.nParameters)
			totalDLossDWeight := make([]float64, g.nParameters)
			var loss float64
			output := make([]float64, g.outputDim)
			for i := start; i < end; i++ {
				// Compute the prediction
				lossDeriver.Predict(parameters, g.features.RawRowView(i), prediction)
				// Compute the loss

				g.outputs.Row(output, i)
				loss += g.losser.LossDeriv(prediction, output, dLossDPred)
				// Compute the derivative
				lossDeriver.Deriv(parameters, g.features.RawRowView(i), prediction, dLossDPred, dLossDWeight)

				floats.Add(totalDLossDWeight, dLossDWeight)
			}
			// Send the value back on the channel
			c <- lossDerivStruct{
				loss:  loss,
				deriv: totalDLossDWeight,
			}
		}
	case !cacheFeatures:
		f = func(start, end int, c chan lossDerivStruct, parameters []float64) {
			lossDeriver := g.t.NewLossDeriver()
			prediction := make([]float64, g.outputDim)
			dLossDPred := make([]float64, g.outputDim)
			dLossDWeight := make([]float64, g.nParameters)
			totalDLossDWeight := make([]float64, g.nParameters)
			var loss float64
			output := make([]float64, g.outputDim)

			input := make([]float64, g.inputDim)

			features := make([]float64, g.t.NumFeatures())

			featurizer := g.t.NewFeaturizer()
			for i := start; i < end; i++ {

				g.inputs.Row(input, i)

				featurizer.Featurize(input, features)

				// Compute the prediction
				lossDeriver.Predict(parameters, features, prediction)
				// Compute the loss
				g.outputs.Row(output, i)

				loss += g.losser.LossDeriv(prediction, output, dLossDPred)

				// Compute the derivative
				lossDeriver.Deriv(parameters, features, prediction, dLossDPred, dLossDWeight)

				// Add to the total derivative
				floats.Add(totalDLossDWeight, dLossDWeight)

				// Send the value back on the channel
				c <- lossDerivStruct{
					loss:  loss,
					deriv: totalDLossDWeight,
				}
			}
		}
	}

	g.lossDerivFunc = f

	return g
}
Пример #11
0
func (lbfgs *Lbfgs) Iterate(loc *multi.Location, obj *uni.Objective, grad *multi.Gradient, fun optimize.MultiObjGrad) (status.Status, error) {
	counter := lbfgs.counter
	q := lbfgs.q
	a := lbfgs.a
	b := lbfgs.b
	rhoHist := lbfgs.rhoHist
	sHist := lbfgs.sHist
	yHist := lbfgs.yHist
	gamma_k := lbfgs.gamma_k
	tmp := lbfgs.tmp
	p_k := lbfgs.p_k
	s_k := lbfgs.s_k
	y_k := lbfgs.y_k
	z := lbfgs.z

	// Calculate search direction
	for i, val := range grad.Curr() {
		q[i] = val
	}
	for i := counter - 1; i >= 0; i-- {
		a[i] = rhoHist[i] * floats.Dot(sHist[i], q)
		copy(tmp, yHist[i])
		floats.Scale(a[i], tmp)
		floats.Sub(q, tmp)
	}
	for i := lbfgs.NumStore - 1; i >= counter; i-- {
		a[i] = rhoHist[i] * floats.Dot(sHist[i], q)
		copy(tmp, yHist[i])
		floats.Scale(a[i], tmp)
		//fmt.Println(q)
		//fmt.Println(tmp)
		floats.Sub(q, tmp)
	}

	// Assume H_0 is the identity times gamma_k
	copy(z, q)
	floats.Scale(gamma_k, z)
	// Second loop for update, going oldest to newest
	for i := counter; i < lbfgs.NumStore; i++ {
		b[i] = rhoHist[i] * floats.Dot(yHist[i], z)
		copy(tmp, sHist[i])
		floats.Scale(a[i]-b[i], tmp)
		floats.Add(z, tmp)
	}
	for i := 0; i < counter; i++ {
		b[i] = rhoHist[i] * floats.Dot(yHist[i], z)
		copy(tmp, sHist[i])
		floats.Scale(a[i]-b[i], tmp)
		floats.Add(z, tmp)
	}

	lbfgs.a = a
	lbfgs.b = b

	copy(p_k, z)
	floats.Scale(-1, p_k)
	normP_k := floats.Norm(p_k, 2)

	// Perform line search -- need to find some way to implement this, especially bookkeeping function values
	linesearchResult, err := linesearch.Linesearch(fun, lbfgs.LinesearchMethod, lbfgs.LinesearchSettings, lbfgs.Wolfe, p_k, loc.Curr(), obj.Curr(), grad.Curr())

	// In the future add a check to switch to a different linesearcher?
	if err != nil {
		return status.LinesearchFailure, err
	}
	x_kp1 := linesearchResult.Loc
	f_kp1 := linesearchResult.Obj
	g_kp1 := linesearchResult.Grad
	alpha_k := linesearchResult.Step

	// Update hessian estimate
	copy(s_k, p_k)
	floats.Scale(alpha_k, s_k)

	copy(y_k, g_kp1)
	floats.Sub(y_k, grad.Curr())
	skDotYk := floats.Dot(s_k, y_k)

	// Bookkeep the results
	stepSize := alpha_k * normP_k
	lbfgs.step.AddToHist(stepSize)
	lbfgs.step.SetCurr(stepSize)
	loc.SetCurr(x_kp1)
	//lbfgs.loc.AddToHist(x_kp1)

	//fmt.Println(lbfgs.loc.GetHist())
	obj.SetCurr(f_kp1)
	grad.SetCurr(g_kp1)

	copy(sHist[counter], s_k)
	copy(yHist[counter], y_k)
	rhoHist[counter] = 1 / skDotYk

	lbfgs.gamma_k = skDotYk / floats.Dot(y_k, y_k)

	lbfgs.counter += 1
	if lbfgs.counter == lbfgs.NumStore {
		lbfgs.counter = 0
	}
	return status.Continue, nil
}
Пример #12
0
func (b *BatchGradient) funcGrad(params, deriv []float64) float64 {
	nParameters := len(deriv)

	// Send out all of the work
	done := make(chan result)
	sz := b.nSamples / b.Workers
	sent := 0
	for i := 0; i < b.Workers; i++ {
		outputDim := b.outputDim
		last := sent + sz
		if i == b.Workers-1 {
			last = b.nSamples
		}
		go func(sent, last int) {
			lossDeriver := b.Trainable.NewLossDeriver()
			predOutput := make([]float64, outputDim)
			dLossDPred := make([]float64, outputDim)
			dLossDParam := make([]float64, nParameters)
			outputs := make([]float64, outputDim)
			tmpderiv := make([]float64, nParameters)
			var totalLoss float64
			for i := sent; i < last; i++ {
				lossDeriver.Predict(params, b.features.RawRowView(i), predOutput)
				b.Outputs.Row(outputs, i)
				loss := b.Losser.LossDeriv(predOutput, outputs, dLossDPred)
				if b.Weights == nil {
					totalLoss += loss
				} else {
					totalLoss += b.Weights[i] * loss
				}
				lossDeriver.Deriv(params, b.features.RawRowView(i), predOutput, dLossDPred, dLossDParam)
				if b.Weights != nil {
					floats.Scale(b.Weights[i], dLossDParam)
				}
				floats.Add(tmpderiv, dLossDParam)
			}
			done <- result{totalLoss, tmpderiv}
		}(sent, last)
		sent += sz
	}
	// Collect all the results
	var totalLoss float64
	for i := range deriv {
		deriv[i] = 0
	}
	for i := 0; i < b.Workers; i++ {
		w := <-done
		totalLoss += w.loss
		floats.Add(deriv, w.deriv)
	}
	// Compute the regularizer
	if b.Regularizer != nil {
		tmp := make([]float64, nParameters)
		totalLoss += b.Regularizer.LossDeriv(params, tmp)
		floats.Add(deriv, tmp)
	}
	sumWeights := float64(b.nSamples)
	if b.Weights != nil {
		sumWeights = floats.Sum(b.Weights)
	}
	totalLoss /= sumWeights
	floats.Scale(1/sumWeights, deriv)
	return totalLoss
}
Пример #13
0
func (g *GradOptimizable) Init() error {
	if g.Losser == nil {
		g.Losser = loss.SquaredDistance{}
	}
	if g.Regularizer == nil {
		g.Regularizer = regularize.None{}
	}
	if g.Sampler == nil {
		g.Sampler = &Batch{}
	}

	if g.Inputs == nil {
		return errors.New("No input data")
	}

	nSamples, _ := g.Inputs.Dims()
	if nSamples == 0 {
		return errors.New("No input data")
	}
	if g.NumWorkers == 0 {
		g.NumWorkers = 1
	}

	outputSamples, outputDim := g.Outputs.Dims()
	if outputSamples != nSamples {
		return errors.New("gradoptimize: input and output row mismatch")
	}

	nParameters := g.Trainable.NumParameters()

	batches := make([]batchSend, g.NumWorkers+1) // +1 is for regularizer
	for i := range batches {
		batches[i].deriv = make([]float64, nParameters)
	}
	g.batches = batches

	g.grainSize = g.Trainable.GrainSize()

	g.Sampler.Init(nSamples)

	g.features = FeaturizeTrainable(g.Trainable, g.Inputs, nil)

	work := make(chan batchSend, g.NumWorkers)
	done := make(chan batchSend, g.NumWorkers)
	regularizeChan := make(chan batchSend, 1)
	regDone := make(chan batchSend, 1)
	quit := make(chan struct{})

	g.sendWork = work
	g.receiveWork = done
	g.quit = quit
	g.regularizeChan = regularizeChan
	g.regDone = regDone

	// launch workers
	for worker := 0; worker < g.NumWorkers; worker++ {
		go func(outputDim, nParameterss int) {
			lossDeriver := g.Trainable.NewLossDeriver()
			predOutput := make([]float64, outputDim)
			dLossDPred := make([]float64, outputDim)
			dLossDParam := make([]float64, nParameters)
			outputs := make([]float64, outputDim)
			for {
				select {
				case w := <-work:
					// Zero out existing derivative
					w.loss = 0
					for i := range w.deriv {
						w.deriv[i] = 0
					}
					for _, idx := range w.idxs {
						lossDeriver.Predict(w.parameters, g.features.RawRowView(idx), predOutput)
						g.Outputs.Row(outputs, idx)
						loss := g.Losser.LossDeriv(predOutput, outputs, dLossDPred)
						if g.Weights == nil {
							w.loss += loss
						} else {
							w.loss += g.Weights[idx] * loss
						}
						lossDeriver.Deriv(w.parameters, g.features.RawRowView(idx), predOutput, dLossDPred, dLossDParam)
						if g.Weights != nil {
							floats.Scale(g.Weights[idx], dLossDParam)
						}
						floats.Add(w.deriv, dLossDParam)
					}
					// Send the result back
					done <- w
				case <-quit:
					return
				}
			}
		}(outputDim, nParameters)
	}

	// launch regularizer
	go func() {
		for {
			select {
			case w := <-regularizeChan:
				loss := g.Regularizer.LossDeriv(w.parameters, w.deriv)
				w.loss = loss
				regDone <- w
			case <-quit:
				return
			}
		}
	}()
	return nil
}
Пример #14
0
func (g *GP) marginalLikelihoodDerivative(x, grad []float64, trainNoise bool, mem *margLikeMemory) {
	// d/dTheta_j log[(p|X,theta)] =
	//		1/2 * y^T * K^-1 dK/dTheta_j * K^-1 * y - 1/2 * tr(K^-1 * dK/dTheta_j)
	//		1/2 * α^T * dK/dTheta_j * α - 1/2 * tr(K^-1 dK/dTheta_j)
	// Multiply by the same -2
	//		-α^T * K^-1 * α + tr(K^-1 dK/dTheta_j)
	// This first computation is an inner product.
	n := len(g.outputs)
	nHyper := g.kernel.NumHyper()
	k := mem.k
	chol := mem.chol
	alpha := mem.alpha
	dKdTheta := mem.dKdTheta
	kInvDK := mem.kInvDK

	y := mat64.NewVector(n, g.outputs)

	var noise float64
	if trainNoise {
		noise = math.Exp(x[len(x)-1])
	} else {
		noise = g.noise
	}

	// If x is the same, then reuse what has been computed in the function.
	if !floats.Equal(mem.lastX, x) {
		copy(mem.lastX, x)
		g.kernel.SetHyper(x[:nHyper])
		g.setKernelMat(k, noise)
		//chol.Cholesky(k, false)
		chol.Factorize(k)
		alpha.SolveCholeskyVec(chol, y)
	}
	g.setKernelMatDeriv(dKdTheta, trainNoise, noise)
	for i := range dKdTheta {
		kInvDK.SolveCholesky(chol, dKdTheta[i])
		inner := mat64.Inner(alpha, dKdTheta[i], alpha)
		grad[i] = -inner + mat64.Trace(kInvDK)
	}
	floats.Scale(1/float64(n), grad)

	bounds := g.kernel.Bounds()
	if trainNoise {
		bounds = append(bounds, Bound{minLogNoise, maxLogNoise})
	}
	barrierGrad := make([]float64, len(grad))
	for i, v := range x {
		// Quadratic barrier penalty.
		if v < bounds[i].Min {
			diff := bounds[i].Min - v
			barrierGrad[i] = -(barrierPow) * math.Pow(diff, barrierPow-1)
		}
		if v > bounds[i].Max {
			diff := v - bounds[i].Max
			barrierGrad[i] = (barrierPow) * math.Pow(diff, barrierPow-1)
		}
	}
	fmt.Println("noise, minNoise", x[len(x)-1], bounds[len(x)-1].Min)
	fmt.Println("barrier Grad", barrierGrad)
	floats.Add(grad, barrierGrad)
	//copy(grad, barrierGrad)
}