func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { // Uses two-loop correction as described in // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. if len(loc.X) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(loc.Gradient) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(dir) != l.dim { panic("lbfgs: unexpected size mismatch") } y := l.y[l.oldest] floats.SubTo(y, loc.Gradient, l.grad) s := l.s[l.oldest] floats.SubTo(s, loc.X, l.x) sDotY := floats.Dot(s, y) l.rho[l.oldest] = 1 / sDotY l.oldest = (l.oldest + 1) % l.Store copy(l.x, loc.X) copy(l.grad, loc.Gradient) copy(dir, loc.Gradient) // Start with the most recent element and go backward, for i := 0; i < l.Store; i++ { idx := l.oldest - i - 1 if idx < 0 { idx += l.Store } l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) floats.AddScaled(dir, -l.a[idx], l.y[idx]) } // Scale the initial Hessian. gamma := sDotY / floats.Dot(y, y) floats.Scale(gamma, dir) // Start with the oldest element and go forward. for i := 0; i < l.Store; i++ { idx := i + l.oldest if idx >= l.Store { idx -= l.Store } beta := l.rho[idx] * floats.Dot(l.y[idx], dir) floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) } // dir contains H^{-1} * g, so flip the direction for minimization. floats.Scale(-1, dir) return 1 }
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { if len(loc.X) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(loc.Gradient) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(dir) != l.dim { panic("lbfgs: unexpected size mismatch") } // Update direction. Uses two-loop correction as described in // Nocedal, Wright (2006), Numerical Optimization (2nd ed.). Chapter 7, page 178. copy(dir, loc.Gradient) floats.SubTo(l.y, loc.Gradient, l.grad) floats.SubTo(l.s, loc.X, l.x) copy(l.sHist[l.oldest], l.s) copy(l.yHist[l.oldest], l.y) sDotY := floats.Dot(l.y, l.s) l.rhoHist[l.oldest] = 1 / sDotY l.oldest++ l.oldest = l.oldest % l.Store copy(l.x, loc.X) copy(l.grad, loc.Gradient) // two loop update. First loop starts with the most recent element // and goes backward, second starts with the oldest element and goes // forward. At the end have computed H^-1 * g, so flip the direction for // minimization. for i := 0; i < l.Store; i++ { idx := l.oldest - i - 1 if idx < 0 { idx += l.Store } l.a[idx] = l.rhoHist[idx] * floats.Dot(l.sHist[idx], dir) floats.AddScaled(dir, -l.a[idx], l.yHist[idx]) } // Scale the initial Hessian. gamma := sDotY / floats.Dot(l.y, l.y) floats.Scale(gamma, dir) for i := 0; i < l.Store; i++ { idx := i + l.oldest if idx >= l.Store { idx -= l.Store } beta := l.rhoHist[idx] * floats.Dot(l.yHist[idx], dir) floats.AddScaled(dir, l.a[idx]-beta, l.sHist[idx]) } floats.Scale(-1, dir) return 1 }
// SetCurr sets the current value of the float // Assumes that the length does not change per iteration. func (f *Floats) SetCurrent(val []float64) { copy(f.previous, f.current) copy(f.current, val) floats.SubTo(f.diff, f.current, f.previous) f.norm = floats.Norm(f.current, 2) }
// Estimate computes model parameters using sufficient statistics. func (g *Model) Estimate() error { if g.NSamples > minNumSamples { /* Estimate the mean. */ floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumx, g.Mean) /* * Estimate the variance. sigma_sq = 1/n (sumxsq - 1/n sumx^2) or * 1/n sumxsq - mean^2. */ tmp := g.variance // borrow as an intermediate array. // floatx.Apply(sq, g.Mean, g.tmpArray) floatx.Sq(g.tmpArray, g.Mean) floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumxsq, tmp) floats.SubTo(g.variance, tmp, g.tmpArray) floatx.Apply(floatx.Floorv(smallVar), g.variance, nil) } else { /* Not enough training sample. */ glog.Warningf("not enough training samples, name [%s], num samples [%e]", g.ModelName, g.NSamples) floatx.Apply(floatx.SetValueFunc(smallVar), g.variance, nil) floatx.Apply(floatx.SetValueFunc(0), g.Mean, nil) } g.setVariance(g.variance) // to update varInv and stddev. /* Update log Gaussian constant. */ floatx.Log(g.tmpArray, g.variance) g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0 glog.V(6).Infof("gaussian reest, name:%s, mean:%v, sd:%v", g.ModelName, g.Mean, g.StdDev) return nil }
// returnNext updates the location based on the iteration type and the current // simplex, and returns the next operation. func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) { n.lastIter = iter switch iter { case nmMajor: // Fill loc with the current best point and value, // and command a convergence check. copy(loc.X, n.vertices[0]) loc.F = n.values[0] return MajorIteration, nil case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside: // x_new = x_centroid + scale * (x_centroid - x_worst) var scale float64 switch iter { case nmReflected: scale = n.reflection case nmExpanded: scale = n.reflection * n.expansion case nmContractedOutside: scale = n.reflection * n.contraction case nmContractedInside: scale = -n.contraction } dim := len(loc.X) floats.SubTo(loc.X, n.centroid, n.vertices[dim]) floats.Scale(scale, loc.X) floats.Add(loc.X, n.centroid) if iter == nmReflected { copy(n.reflectedPoint, loc.X) } return FuncEvaluation, nil case nmShrink: // x_shrink = x_best + delta * (x_i + x_best) floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0]) floats.Scale(n.shrink, loc.X) floats.Add(loc.X, n.vertices[0]) return FuncEvaluation, nil default: panic("unreachable") } }
// returnNext finds the next location to evaluate, stores the location in xNext, // and returns the data func (n *NelderMead) returnNext(iter nmIterType, xNext []float64) (EvaluationType, IterationType, error) { dim := len(xNext) n.lastIter = iter switch iter { case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside: // x_new = x_centroid + scale * (x_centroid - x_worst) var scale float64 switch iter { case nmReflected: scale = n.reflection case nmExpanded: scale = n.reflection * n.expansion case nmContractedOutside: scale = n.reflection * n.contraction case nmContractedInside: scale = -n.contraction } floats.SubTo(xNext, n.centroid, n.vertices[dim]) floats.Scale(scale, xNext) floats.Add(xNext, n.centroid) if iter == nmReflected { copy(n.reflectedPoint, xNext) // Nelder Mead iterations start with Reflection step return FuncEvaluation, MajorIteration, nil } return FuncEvaluation, MinorIteration, nil case nmShrink: // x_shrink = x_best + delta * (x_i + x_best) floats.SubTo(xNext, n.vertices[n.fillIdx], n.vertices[0]) floats.Scale(n.shrink, xNext) floats.Add(xNext, n.vertices[0]) return FuncEvaluation, SubIteration, nil default: panic("unreachable") } }
// LogProb computes the log of the pdf of the point x. func (n *Normal) LogProb(x []float64) float64 { dim := n.dim if len(x) != dim { panic(badSizeMismatch) } // Compute the normalization constant c := -0.5*float64(dim)*logTwoPi - n.logSqrtDet // Compute (x-mu)'Sigma^-1 (x-mu) xMinusMu := make([]float64, dim) floats.SubTo(xMinusMu, x, n.mu) d := mat64.NewVector(dim, xMinusMu) tmp := make([]float64, dim) tmpVec := mat64.NewVector(dim, tmp) tmpVec.SolveCholeskyVec(n.chol, d) return c - 0.5*floats.Dot(tmp, xMinusMu) }
// Initialize initializes the Float to be ready to optimize by // setting the history slice to have length zero, and setting // the current value equal to the initial value // This should be called by the optimizer at the beginning of // the optimization func (f *Floats) Init() error { f.Hist = f.Hist[:0] if f.Initial == nil { return errors.New("multivariate: initial slice is nil") } f.length = len(f.Initial) f.diff = make([]float64, len(f.Initial)) f.current = make([]float64, len(f.Initial)) f.previous = make([]float64, len(f.Initial)) for i := range f.previous { f.previous[i] = math.Inf(1) } copy(f.current, f.Initial) f.norm = floats.Norm(f.current, 2) floats.SubTo(f.diff, f.current, f.previous) f.AddToHist(f.Initial) //f.prevNorm = math.Inf(1) return nil }
// Minus computes the difference between two images. // Does not modify either input. func (f *Multi) Minus(g *Multi) *Multi { dst := NewMulti(f.Width, f.Height, f.Channels) floats.SubTo(dst.Elems, f.Elems, g.Elems) return dst }
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { if len(loc.X) != b.dim { panic("bfgs: unexpected size mismatch") } if len(loc.Gradient) != b.dim { panic("bfgs: unexpected size mismatch") } if len(dir) != b.dim { panic("bfgs: unexpected size mismatch") } // Compute the gradient difference in the last step // y = g_{k+1} - g_{k} floats.SubTo(b.y, loc.Gradient, b.grad) // Compute the step difference // s = x_{k+1} - x_{k} floats.SubTo(b.s, loc.X, b.x) sDotY := floats.Dot(b.s, b.y) sDotYSquared := sDotY * sDotY if b.first { // Rescale the initial hessian. // From: Numerical optimization, Nocedal and Wright, Page 143, Eq. 6.20 (second edition). yDotY := floats.Dot(b.y, b.y) scale := sDotY / yDotY for i := 0; i < len(loc.X); i++ { for j := 0; j < len(loc.X); j++ { if i == j { b.invHess.SetSym(i, i, scale) } else { b.invHess.SetSym(i, j, 0) } } } b.first = false } // Compute the update rule // B_{k+1}^-1 // First term is just the existing inverse hessian // Second term is // (sk^T yk + yk^T B_k^-1 yk)(s_k sk_^T) / (sk^T yk)^2 // Third term is // B_k ^-1 y_k sk^T + s_k y_k^T B_k-1 // // y_k^T B_k^-1 y_k is a scalar, and the third term is a rank-two update // where B_k^-1 y_k is one vector and s_k is the other. Compute the update // values then actually perform the rank updates. yBy := mat64.Inner(b.yVec, b.invHess, b.yVec) firstTermConst := (sDotY + yBy) / (sDotYSquared) b.tmpVec.MulVec(b.invHess, b.yVec) b.invHess.RankTwo(b.invHess, -1/sDotY, b.tmpVec, b.sVec) b.invHess.SymRankOne(b.invHess, firstTermConst, b.sVec) // update the bfgs stored data to the new iteration copy(b.x, loc.X) copy(b.grad, loc.Gradient) // Compute the new search direction d := mat64.NewVector(b.dim, dir) g := mat64.NewVector(b.dim, loc.Gradient) d.MulVec(b.invHess, g) // new direction stored in place floats.Scale(-1, dir) return 1 }
func minus(a, b []float64) []float64 { dst := make([]float64, len(a)) floats.SubTo(dst, a, b) return dst }
func simplex(initialBasic []int, c []float64, A mat64.Matrix, b []float64, tol float64) (float64, []float64, []int, error) { err := verifyInputs(initialBasic, c, A, b) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } return math.NaN(), nil, nil, err } m, n := A.Dims() // There is at least one optimal solution to the LP which is at the intersection // to a set of constraint boundaries. For a standard form LP with m variables // and n equality constraints, at least m-n elements of x must equal zero // at optimality. The Simplex algorithm solves the standard-form LP by starting // at an initial constraint vertex and successively moving to adjacent constraint // vertices. At every vertex, the set of non-zero x values is the "basic // feasible solution". The list of non-zero x's are maintained in basicIdxs, // the respective columns of A are in ab, and the actual non-zero values of // x are in xb. // // The LP is equality constrained such that A * x = b. This can be expanded // to // ab * xb + an * xn = b // where ab are the columns of a in the basic set, and an are all of the // other columns. Since each element of xn is zero by definition, this means // that for all feasible solutions xb = ab^-1 * b. // // Before the simplex algorithm can start, an initial feasible solution must // be found. If initialBasic is non-nil a feasible solution has been supplied. // Otherwise the "Phase I" problem must be solved to find an initial feasible // solution. var basicIdxs []int // The indices of the non-zero x values. var ab *mat64.Dense // The subset of columns of A listed in basicIdxs. var xb []float64 // The non-zero elements of x. xb = ab^-1 b if initialBasic != nil { // InitialBasic supplied. Panic if incorrect length or infeasible. if len(initialBasic) != m { panic("lp: incorrect number of initial vectors") } ab = extractColumns(A, initialBasic) xb, err = initializeFromBasic(ab, b) if err != nil { panic(err) } basicIdxs = make([]int, len(initialBasic)) copy(basicIdxs, initialBasic) } else { // No inital basis supplied. Solve the PhaseI problem. basicIdxs, ab, xb, err = findInitialBasic(A, b) if err != nil { return math.NaN(), nil, nil, err } } // basicIdxs contains the indexes for an initial feasible solution, // ab contains the extracted columns of A, and xb contains the feasible // solution. All x not in the basic set are 0 by construction. // nonBasicIdx is the set of nonbasic variables. nonBasicIdx := make([]int, 0, n-m) inBasic := make(map[int]struct{}) for _, v := range basicIdxs { inBasic[v] = struct{}{} } for i := 0; i < n; i++ { _, ok := inBasic[i] if !ok { nonBasicIdx = append(nonBasicIdx, i) } } // cb is the subset of c for the basic variables. an and cn // are the equivalents to ab and cb but for the nonbasic variables. cb := make([]float64, len(basicIdxs)) for i, idx := range basicIdxs { cb[i] = c[idx] } cn := make([]float64, len(nonBasicIdx)) for i, idx := range nonBasicIdx { cn[i] = c[idx] } an := extractColumns(A, nonBasicIdx) bVec := mat64.NewVector(len(b), b) cbVec := mat64.NewVector(len(cb), cb) // Temporary data needed each iteration. (Described later) r := make([]float64, n-m) move := make([]float64, m) // Solve the linear program starting from the initial feasible set. This is // the "Phase 2" problem. // // Algorithm: // 1) Compute the "reduced costs" for the non-basic variables. The reduced // costs are the lagrange multipliers of the constraints. // r = cn - an^T * ab^-T * cb // 2) If all of the reduced costs are positive, no improvement is possible, // and the solution is optimal (xn can only increase because of // non-negativity constraints). Otherwise, the solution can be improved and // one element will be exchanged in the basic set. // 3) Choose the x_n with the most negative value of r. Call this value xe. // This variable will be swapped into the basic set. // 4) Increase xe until the next constraint boundary is met. This will happen // when the first element in xb becomes 0. The distance xe can increase before // a given element in xb becomes negative can be found from // xb = Ab^-1 b - Ab^-1 An xn // = Ab^-1 b - Ab^-1 Ae xe // = bhat + d x_e // xe = bhat_i / - d_i // where Ae is the column of A corresponding to xe. // The constraining basic index is the first index for which this is true, // so remove the element which is min_i (bhat_i / -d_i), assuming d_i is negative. // If no d_i is less than 0, then the problem is unbounded. // 5) If the new xe is 0 (that is, bhat_i == 0), then this location is at // the intersection of several constraints. Use the Bland rule instead // of the rule in step 4 to avoid cycling. for { // Compute reduced costs -- r = cn - an^T ab^-T cb var tmp mat64.Vector err = tmp.SolveVec(ab.T(), cbVec) if err != nil { break } data := make([]float64, n-m) tmp2 := mat64.NewVector(n-m, data) tmp2.MulVec(an.T(), &tmp) floats.SubTo(r, cn, data) // Replace the most negative element in the simplex. If there are no // negative entries then the optimal solution has been found. minIdx := floats.MinIdx(r) if r[minIdx] >= -tol { break } for i, v := range r { if math.Abs(v) < rRoundTol { r[i] = 0 } } // Compute the moving distance. err = computeMove(move, minIdx, A, ab, xb, nonBasicIdx) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } // Replace the basic index along the tightest constraint. replace := floats.MinIdx(move) if move[replace] <= 0 { replace, minIdx, err = replaceBland(A, ab, xb, basicIdxs, nonBasicIdx, r, move) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } } // Replace the constrained basicIdx with the newIdx. basicIdxs[replace], nonBasicIdx[minIdx] = nonBasicIdx[minIdx], basicIdxs[replace] cb[replace], cn[minIdx] = cn[minIdx], cb[replace] tmpCol1 := mat64.Col(nil, replace, ab) tmpCol2 := mat64.Col(nil, minIdx, an) ab.SetCol(replace, tmpCol2) an.SetCol(minIdx, tmpCol1) // Compute the new xb. xbVec := mat64.NewVector(len(xb), xb) err = xbVec.SolveVec(ab, bVec) if err != nil { break } } // Found the optimum successfully or died trying. The basic variables get // their values, and the non-basic variables are all zero. opt := floats.Dot(cb, xb) xopt := make([]float64, n) for i, v := range basicIdxs { xopt[v] = xb[i] } return opt, xopt, basicIdxs, err }
// Minus computes the difference between two images. // Does not modify either input. func (f *Image) Minus(g *Image) *Image { dst := New(f.Width, f.Height) floats.SubTo(dst.Elems, f.Elems, g.Elems) return dst }