func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { // Uses two-loop correction as described in // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. if len(loc.X) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(loc.Gradient) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(dir) != l.dim { panic("lbfgs: unexpected size mismatch") } y := l.y[l.oldest] floats.SubTo(y, loc.Gradient, l.grad) s := l.s[l.oldest] floats.SubTo(s, loc.X, l.x) sDotY := floats.Dot(s, y) l.rho[l.oldest] = 1 / sDotY l.oldest = (l.oldest + 1) % l.Store copy(l.x, loc.X) copy(l.grad, loc.Gradient) copy(dir, loc.Gradient) // Start with the most recent element and go backward, for i := 0; i < l.Store; i++ { idx := l.oldest - i - 1 if idx < 0 { idx += l.Store } l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) floats.AddScaled(dir, -l.a[idx], l.y[idx]) } // Scale the initial Hessian. gamma := sDotY / floats.Dot(y, y) floats.Scale(gamma, dir) // Start with the oldest element and go forward. for i := 0; i < l.Store; i++ { idx := i + l.oldest if idx >= l.Store { idx -= l.Store } beta := l.rho[idx] * floats.Dot(l.y[idx], dir) floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) } // dir contains H^{-1} * g, so flip the direction for minimization. floats.Scale(-1, dir) return 1 }
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { if len(loc.X) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(loc.Gradient) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(dir) != l.dim { panic("lbfgs: unexpected size mismatch") } // Update direction. Uses two-loop correction as described in // Nocedal, Wright (2006), Numerical Optimization (2nd ed.). Chapter 7, page 178. copy(dir, loc.Gradient) floats.SubTo(l.y, loc.Gradient, l.grad) floats.SubTo(l.s, loc.X, l.x) copy(l.sHist[l.oldest], l.s) copy(l.yHist[l.oldest], l.y) sDotY := floats.Dot(l.y, l.s) l.rhoHist[l.oldest] = 1 / sDotY l.oldest++ l.oldest = l.oldest % l.Store copy(l.x, loc.X) copy(l.grad, loc.Gradient) // two loop update. First loop starts with the most recent element // and goes backward, second starts with the oldest element and goes // forward. At the end have computed H^-1 * g, so flip the direction for // minimization. for i := 0; i < l.Store; i++ { idx := l.oldest - i - 1 if idx < 0 { idx += l.Store } l.a[idx] = l.rhoHist[idx] * floats.Dot(l.sHist[idx], dir) floats.AddScaled(dir, -l.a[idx], l.yHist[idx]) } // Scale the initial Hessian. gamma := sDotY / floats.Dot(l.y, l.y) floats.Scale(gamma, dir) for i := 0; i < l.Store; i++ { idx := i + l.oldest if idx >= l.Store { idx -= l.Store } beta := l.rhoHist[idx] * floats.Dot(l.yHist[idx], dir) floats.AddScaled(dir, l.a[idx]-beta, l.sHist[idx]) } floats.Scale(-1, dir) return 1 }
func (ls *LinesearchMethod) initNextLinesearch(loc *Location, xNext []float64) (EvaluationType, IterationType, error) { copy(ls.x, loc.X) var stepSize float64 if ls.first { stepSize = ls.NextDirectioner.InitDirection(loc, ls.dir) ls.first = false } else { stepSize = ls.NextDirectioner.NextDirection(loc, ls.dir) } projGrad := floats.Dot(loc.Gradient, ls.dir) if projGrad >= 0 { ls.evalType = NoEvaluation ls.iterType = NoIteration return ls.evalType, ls.iterType, ErrNonNegativeStepDirection } ls.evalType = ls.Linesearcher.Init(loc.F, projGrad, stepSize) floats.AddScaledTo(xNext, ls.x, stepSize, ls.dir) // Compare the starting point for the current iteration with the next // evaluation point to make sure that rounding errors do not prevent progress. if floats.Equal(ls.x, xNext) { ls.evalType = NoEvaluation ls.iterType = NoIteration return ls.evalType, ls.iterType, ErrNoProgress } ls.iterType = MinorIteration return ls.evalType, ls.iterType, nil }
// initNextLinesearch initializes the next linesearch using the previous // complete location stored in loc. It fills loc.X and returns an evaluation // to be performed at loc.X. func (ls *LinesearchMethod) initNextLinesearch(loc *Location) (Operation, error) { copy(ls.x, loc.X) var step float64 if ls.first { ls.first = false step = ls.NextDirectioner.InitDirection(loc, ls.dir) } else { step = ls.NextDirectioner.NextDirection(loc, ls.dir) } projGrad := floats.Dot(loc.Gradient, ls.dir) if projGrad >= 0 { return ls.error(ErrNonNegativeStepDirection) } op := ls.Linesearcher.Init(loc.F, projGrad, step) if !op.isEvaluation() { panic("linesearch: Linesearcher returned invalid operation") } floats.AddScaledTo(loc.X, ls.x, step, ls.dir) if floats.Equal(ls.x, loc.X) { // Step size is so small that the next evaluation point is // indistinguishable from the starting point for the current iteration // due to rounding errors. return ls.error(ErrNoProgress) } ls.lastStep = step ls.eval = NoOperation // Invalidate all fields of loc. ls.lastOp = op return ls.lastOp, nil }
// Combine takes a weighted sum of the inputs with the weights set by parameters // The last element of parameters is the bias term, so len(parameters) = len(inputs) + 1 func (s SumNeuron) Combine(parameters []float64, inputs []float64) (combination float64) { /* for i, val := range inputs { combination += parameters[i] * val } */ combination = floats.Dot(inputs, parameters[:len(inputs)]) combination += parameters[len(parameters)-1] return }
func (ls *LinesearchMethod) Iterate(loc *Location, xNext []float64) (EvaluationType, IterationType, error) { if ls.iterType == SubIteration { // We needed to evaluate invalid fields of Location. Now we have them // and can announce MajorIteration. copy(xNext, loc.X) ls.evalType = NoEvaluation ls.iterType = MajorIteration return ls.evalType, ls.iterType, nil } if ls.iterType == MajorIteration { // The linesearch previously signaled MajorIteration. Since we're here, // it means that the previous location is not good enough to converge, // so start the next linesearch. return ls.initNextLinesearch(loc, xNext) } projGrad := floats.Dot(loc.Gradient, ls.dir) if ls.Linesearcher.Finished(loc.F, projGrad) { copy(xNext, loc.X) // Check if the last evaluation evaluated all fields of Location. ls.evalType = complementEval(loc, ls.evalType) if ls.evalType == NoEvaluation { // Location is complete and MajorIteration can be announced directly. ls.iterType = MajorIteration } else { // Location is not complete, evaluate its invalid fields in SubIteration. ls.iterType = SubIteration } return ls.evalType, ls.iterType, nil } // Line search not done, just iterate. stepSize, evalType, err := ls.Linesearcher.Iterate(loc.F, projGrad) if err != nil { ls.evalType = NoEvaluation ls.iterType = NoIteration return ls.evalType, ls.iterType, err } floats.AddScaledTo(xNext, ls.x, stepSize, ls.dir) // Compare the starting point for the current iteration with the next // evaluation point to make sure that rounding errors do not prevent progress. if floats.Equal(ls.x, xNext) { ls.evalType = NoEvaluation ls.iterType = NoIteration return ls.evalType, ls.iterType, ErrNoProgress } ls.evalType = evalType ls.iterType = MinorIteration return ls.evalType, ls.iterType, nil }
func (cg *CG) Iterate(ctx *Context) Operation { switch cg.resume { case 1: cg.resume = 2 return SolvePreconditioner // Solve M z = r_{i-1} case 2: // ρ_i = r_{i-1} · z cg.rho = floats.Dot(ctx.Residual, ctx.Z) if !cg.first { // β = ρ_i / ρ_{i-1} beta := cg.rho / cg.rho1 // z = z + β p_{i-1} floats.AddScaled(ctx.Z, beta, ctx.P) } cg.first = false // p_i = z copy(ctx.P, ctx.Z) cg.resume = 3 return ComputeAp // Compute Ap case 3: // α = ρ_i / (p_i · Ap_i) alpha := cg.rho / floats.Dot(ctx.P, ctx.Ap) // x_i = x_{i-1} + α p_i floats.AddScaled(ctx.X, alpha, ctx.P) // r_i = r_{i-1} - α Ap_i floats.AddScaled(ctx.Residual, -alpha, ctx.Ap) cg.rho1 = cg.rho cg.resume = 1 return CheckConvergence } panic("unreachable") }
func isOrthogonal(a *Dense) bool { rows, cols := a.Dims() col1 := make([]float64, rows) col2 := make([]float64, rows) for i := 0; i < cols-1; i++ { for j := i + 1; j < cols; j++ { a.Col(col1, i) a.Col(col2, j) dot := floats.Dot(col1, col2) if math.Abs(dot) > 1e-14 { return false } } } return true }
// LogProb computes the log of the pdf of the point x. func (n *Normal) LogProb(x []float64) float64 { dim := n.dim if len(x) != dim { panic(badSizeMismatch) } // Compute the normalization constant c := -0.5*float64(dim)*logTwoPi - n.logSqrtDet // Compute (x-mu)'Sigma^-1 (x-mu) xMinusMu := make([]float64, dim) floats.SubTo(xMinusMu, x, n.mu) d := mat64.NewVector(dim, xMinusMu) tmp := make([]float64, dim) tmpVec := mat64.NewVector(dim, tmp) tmpVec.SolveCholeskyVec(n.chol, d) return c - 0.5*floats.Dot(tmp, xMinusMu) }
// Mean returns the gaussian process prediction of the mean at the location x. func (g *GP) Mean(x []float64) float64 { // y_mean = k_*^T K^-1 y // where k_* is the vector of the kernel between the new location and all // of the data points // y are the outputs at all the data points // K^-1 is the full covariance of the data points // (K^-1y is stored) if len(x) != g.inputDim { panic(badInputLength) } nSamples, _ := g.inputs.Dims() covariance := make([]float64, nSamples) for i := range covariance { covariance[i] = g.kernel.Distance(x, g.inputs.RawRowView(i)) } y := floats.Dot(g.sigInvY.RawVector().Data, covariance) return y*g.std + g.mean }
func (l *linesearchFun) ObjGrad(step float64) (f float64, g float64, err error) { // Take the step (need to add back in the scaling) for i, val := range l.direction { l.currLoc[i] = val*step + l.initLoc[i] } // Copy the location (in case the user-defined function modifies it) copy(l.currLocCopy, l.currLoc) f, gVec, err := l.fun.ObjGrad(l.currLocCopy) if err != nil { return f, g, errors.New("linesearch: error during user defined function") } // Add the function to the history so that it isn't thrown out // Copy the gradient vector (in case Fun modifies it) n := copy(l.currGrad, gVec) if n != len(l.currLocCopy) { return f, g, errors.New("linesearch: user defined function returned incorrect gradient length") } // Find the gradient in the direction of the search vector g = floats.Dot(l.direction, l.currGrad) l.wolfe.SetCurrState(f, g, step) return f, g, nil }
// Explicitly forms vectors and computes normalized dot product. func cosCorrMultiNaive(f, g *rimg64.Multi) *rimg64.Image { h := rimg64.New(f.Width-g.Width+1, f.Height-g.Height+1) n := g.Width * g.Height * g.Channels a := make([]float64, n) b := make([]float64, n) for i := 0; i < h.Width; i++ { for j := 0; j < h.Height; j++ { a = a[:0] b = b[:0] for u := 0; u < g.Width; u++ { for v := 0; v < g.Height; v++ { for p := 0; p < g.Channels; p++ { a = append(a, f.At(i+u, j+v, p)) b = append(b, g.At(u, v, p)) } } } floats.Scale(1/floats.Norm(a, 2), a) floats.Scale(1/floats.Norm(b, 2), b) h.Set(i, j, floats.Dot(a, b)) } } return h }
// Linesearch performs a linesearch. Optimizer should turn off all non-wolfe status patterns for the gradient and step func Linesearch(multifun common.MultiObjGrad, method LinesearchMethod, settings univariate.GradSettings, wolfe WolfeConditioner, searchVector []float64, initLoc []float64, initObj float64, initGrad []float64) (*LinesearchResult, error) { // Linesearch modifies the values of the slices, but should revert the changes by the end // Find the norm of the search direction normSearchVector := floats.Norm(searchVector, 2) // Find the search direction (replace this with an input to avoid make?) direction := make([]float64, len(searchVector)) copy(direction, searchVector) floats.Scale(1/normSearchVector, direction) // Find the initial projection of the gradient into the search direction initDirectionalGrad := floats.Dot(direction, initGrad) if initDirectionalGrad > 0 { return &LinesearchResult{}, errors.New("initial directional gradient must be negative") } // Set wolfe constants wolfe.SetInitState(initObj, initDirectionalGrad) wolfe.SetCurrState(initObj, initDirectionalGrad, 1.0) fun := &linesearchFun{ fun: multifun, wolfe: wolfe, direction: direction, initLoc: initLoc, currLoc: make([]float64, len(initLoc)), currLocCopy: make([]float64, len(initLoc)), currGrad: make([]float64, len(initLoc)), } settings.Gradient.Initial = initDirectionalGrad settings.Objective.Initial = initObj stepSettings := method.GetStepSettings() stepSettings.InitialStepSize = normSearchVector // Run optimization, initial location is zero optVal, optLoc, result, err := univariate.OptimizeGrad(fun, 0, settings, method) //status, err := common.OptimizeOpter(method, fun) // Regerate results structure (do this before returning error in case optimizer can recover from it) // need to scale alpha_k because linesearch is x_k + alpha_k p_k r := &LinesearchResult{ Loc: fun.currLoc, Obj: optVal, Grad: fun.currGrad, Step: optLoc / normSearchVector, } if err != nil { fmt.Println("Error in linsearch") return r, errors.New("linesearch: error during linesearch optimization: " + err.Error()) } stat := result.Status // Check to make sure that the status due to wolfe status if stat != common.WolfeConditionsMet { // If the status wasn't because of wolfe conditions, see if they are met anyway c := wolfe.Status() if c == common.WolfeConditionsMet { // Conditions met, no problem return r, nil } // Conditions not met return r, errors.New("linesearch: status not because of wolfe conditions.") } return r, nil }
// ComputeZ computes the value of z with the given feature vector and b value. // Sqrt2OverD = math.Sqrt(2.0 / len(nFeatures)) func computeZ(featurizedInput, feature []float64, b float64, sqrt2OverD float64) float64 { dot := floats.Dot(featurizedInput, feature) return sqrt2OverD * (math.Cos(dot + b)) }
func (ls *LinesearchMethod) Iterate(loc *Location) (Operation, error) { switch ls.lastOp { case NoOperation: // TODO(vladimir-ch): Either Init has not been called, or the caller is // trying to resume the optimization run after Iterate previously // returned with an error. Decide what is the proper thing to do. See also #125. case MajorIteration: // The previous updated location did not converge the full // optimization. Initialize a new Linesearch. return ls.initNextLinesearch(loc) default: // Update the indicator of valid fields of loc. ls.eval |= ls.lastOp if ls.nextMajor { ls.nextMajor = false // Linesearcher previously finished, and the invalid fields of loc // have now been validated. Announce MajorIteration. ls.lastOp = MajorIteration return ls.lastOp, nil } } // Continue the linesearch. f := math.NaN() if ls.eval&FuncEvaluation != 0 { f = loc.F } projGrad := math.NaN() if ls.eval&GradEvaluation != 0 { projGrad = floats.Dot(loc.Gradient, ls.dir) } op, step, err := ls.Linesearcher.Iterate(f, projGrad) if err != nil { return ls.error(err) } switch op { case MajorIteration: // Linesearch has been finished. ls.lastOp = complementEval(loc, ls.eval) if ls.lastOp == NoOperation { // loc is complete, MajorIteration can be declared directly. ls.lastOp = MajorIteration } else { // Declare MajorIteration on the next call to Iterate. ls.nextMajor = true } case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation: if step != ls.lastStep { // We are moving to a new location, and not, say, evaluating extra // information at the current location. // Compute the next evaluation point and store it in loc.X. floats.AddScaledTo(loc.X, ls.x, step, ls.dir) if floats.Equal(ls.x, loc.X) { // Step size has become so small that the next evaluation point is // indistinguishable from the starting point for the current // iteration due to rounding errors. return ls.error(ErrNoProgress) } ls.lastStep = step ls.eval = NoOperation // Indicate all invalid fields of loc. } ls.lastOp = op default: panic("linesearch: Linesearcher returned invalid operation") } return ls.lastOp, nil }
func main() { fmt.Println(floats.Dot(v, v)) }
func simplex(initialBasic []int, c []float64, A mat64.Matrix, b []float64, tol float64) (float64, []float64, []int, error) { err := verifyInputs(initialBasic, c, A, b) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } return math.NaN(), nil, nil, err } m, n := A.Dims() // There is at least one optimal solution to the LP which is at the intersection // to a set of constraint boundaries. For a standard form LP with m variables // and n equality constraints, at least m-n elements of x must equal zero // at optimality. The Simplex algorithm solves the standard-form LP by starting // at an initial constraint vertex and successively moving to adjacent constraint // vertices. At every vertex, the set of non-zero x values is the "basic // feasible solution". The list of non-zero x's are maintained in basicIdxs, // the respective columns of A are in ab, and the actual non-zero values of // x are in xb. // // The LP is equality constrained such that A * x = b. This can be expanded // to // ab * xb + an * xn = b // where ab are the columns of a in the basic set, and an are all of the // other columns. Since each element of xn is zero by definition, this means // that for all feasible solutions xb = ab^-1 * b. // // Before the simplex algorithm can start, an initial feasible solution must // be found. If initialBasic is non-nil a feasible solution has been supplied. // Otherwise the "Phase I" problem must be solved to find an initial feasible // solution. var basicIdxs []int // The indices of the non-zero x values. var ab *mat64.Dense // The subset of columns of A listed in basicIdxs. var xb []float64 // The non-zero elements of x. xb = ab^-1 b if initialBasic != nil { // InitialBasic supplied. Panic if incorrect length or infeasible. if len(initialBasic) != m { panic("lp: incorrect number of initial vectors") } ab = extractColumns(A, initialBasic) xb, err = initializeFromBasic(ab, b) if err != nil { panic(err) } basicIdxs = make([]int, len(initialBasic)) copy(basicIdxs, initialBasic) } else { // No inital basis supplied. Solve the PhaseI problem. basicIdxs, ab, xb, err = findInitialBasic(A, b) if err != nil { return math.NaN(), nil, nil, err } } // basicIdxs contains the indexes for an initial feasible solution, // ab contains the extracted columns of A, and xb contains the feasible // solution. All x not in the basic set are 0 by construction. // nonBasicIdx is the set of nonbasic variables. nonBasicIdx := make([]int, 0, n-m) inBasic := make(map[int]struct{}) for _, v := range basicIdxs { inBasic[v] = struct{}{} } for i := 0; i < n; i++ { _, ok := inBasic[i] if !ok { nonBasicIdx = append(nonBasicIdx, i) } } // cb is the subset of c for the basic variables. an and cn // are the equivalents to ab and cb but for the nonbasic variables. cb := make([]float64, len(basicIdxs)) for i, idx := range basicIdxs { cb[i] = c[idx] } cn := make([]float64, len(nonBasicIdx)) for i, idx := range nonBasicIdx { cn[i] = c[idx] } an := extractColumns(A, nonBasicIdx) bVec := mat64.NewVector(len(b), b) cbVec := mat64.NewVector(len(cb), cb) // Temporary data needed each iteration. (Described later) r := make([]float64, n-m) move := make([]float64, m) // Solve the linear program starting from the initial feasible set. This is // the "Phase 2" problem. // // Algorithm: // 1) Compute the "reduced costs" for the non-basic variables. The reduced // costs are the lagrange multipliers of the constraints. // r = cn - an^T * ab^-T * cb // 2) If all of the reduced costs are positive, no improvement is possible, // and the solution is optimal (xn can only increase because of // non-negativity constraints). Otherwise, the solution can be improved and // one element will be exchanged in the basic set. // 3) Choose the x_n with the most negative value of r. Call this value xe. // This variable will be swapped into the basic set. // 4) Increase xe until the next constraint boundary is met. This will happen // when the first element in xb becomes 0. The distance xe can increase before // a given element in xb becomes negative can be found from // xb = Ab^-1 b - Ab^-1 An xn // = Ab^-1 b - Ab^-1 Ae xe // = bhat + d x_e // xe = bhat_i / - d_i // where Ae is the column of A corresponding to xe. // The constraining basic index is the first index for which this is true, // so remove the element which is min_i (bhat_i / -d_i), assuming d_i is negative. // If no d_i is less than 0, then the problem is unbounded. // 5) If the new xe is 0 (that is, bhat_i == 0), then this location is at // the intersection of several constraints. Use the Bland rule instead // of the rule in step 4 to avoid cycling. for { // Compute reduced costs -- r = cn - an^T ab^-T cb var tmp mat64.Vector err = tmp.SolveVec(ab.T(), cbVec) if err != nil { break } data := make([]float64, n-m) tmp2 := mat64.NewVector(n-m, data) tmp2.MulVec(an.T(), &tmp) floats.SubTo(r, cn, data) // Replace the most negative element in the simplex. If there are no // negative entries then the optimal solution has been found. minIdx := floats.MinIdx(r) if r[minIdx] >= -tol { break } for i, v := range r { if math.Abs(v) < rRoundTol { r[i] = 0 } } // Compute the moving distance. err = computeMove(move, minIdx, A, ab, xb, nonBasicIdx) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } // Replace the basic index along the tightest constraint. replace := floats.MinIdx(move) if move[replace] <= 0 { replace, minIdx, err = replaceBland(A, ab, xb, basicIdxs, nonBasicIdx, r, move) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } } // Replace the constrained basicIdx with the newIdx. basicIdxs[replace], nonBasicIdx[minIdx] = nonBasicIdx[minIdx], basicIdxs[replace] cb[replace], cn[minIdx] = cn[minIdx], cb[replace] tmpCol1 := mat64.Col(nil, replace, ab) tmpCol2 := mat64.Col(nil, minIdx, an) ab.SetCol(replace, tmpCol2) an.SetCol(minIdx, tmpCol1) // Compute the new xb. xbVec := mat64.NewVector(len(xb), xb) err = xbVec.SolveVec(ab, bVec) if err != nil { break } } // Found the optimum successfully or died trying. The basic variables get // their values, and the non-basic variables are all zero. opt := floats.Dot(cb, xb) xopt := make([]float64, n) for i, v := range basicIdxs { xopt[v] = xb[i] } return opt, xopt, basicIdxs, err }
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { if len(loc.X) != b.dim { panic("bfgs: unexpected size mismatch") } if len(loc.Gradient) != b.dim { panic("bfgs: unexpected size mismatch") } if len(dir) != b.dim { panic("bfgs: unexpected size mismatch") } // Compute the gradient difference in the last step // y = g_{k+1} - g_{k} floats.SubTo(b.y, loc.Gradient, b.grad) // Compute the step difference // s = x_{k+1} - x_{k} floats.SubTo(b.s, loc.X, b.x) sDotY := floats.Dot(b.s, b.y) sDotYSquared := sDotY * sDotY if b.first { // Rescale the initial hessian. // From: Numerical optimization, Nocedal and Wright, Page 143, Eq. 6.20 (second edition). yDotY := floats.Dot(b.y, b.y) scale := sDotY / yDotY for i := 0; i < len(loc.X); i++ { for j := 0; j < len(loc.X); j++ { if i == j { b.invHess.SetSym(i, i, scale) } else { b.invHess.SetSym(i, j, 0) } } } b.first = false } // Compute the update rule // B_{k+1}^-1 // First term is just the existing inverse hessian // Second term is // (sk^T yk + yk^T B_k^-1 yk)(s_k sk_^T) / (sk^T yk)^2 // Third term is // B_k ^-1 y_k sk^T + s_k y_k^T B_k-1 // // y_k^T B_k^-1 y_k is a scalar, and the third term is a rank-two update // where B_k^-1 y_k is one vector and s_k is the other. Compute the update // values then actually perform the rank updates. yBy := mat64.Inner(b.yVec, b.invHess, b.yVec) firstTermConst := (sDotY + yBy) / (sDotYSquared) b.tmpVec.MulVec(b.invHess, b.yVec) b.invHess.RankTwo(b.invHess, -1/sDotY, b.tmpVec, b.sVec) b.invHess.SymRankOne(b.invHess, firstTermConst, b.sVec) // update the bfgs stored data to the new iteration copy(b.x, loc.X) copy(b.grad, loc.Gradient) // Compute the new search direction d := mat64.NewVector(b.dim, dir) g := mat64.NewVector(b.dim, loc.Gradient) d.MulVec(b.invHess, g) // new direction stored in place floats.Scale(-1, dir) return 1 }
func (lbfgs *Lbfgs) Iterate(loc *multi.Location, obj *uni.Objective, grad *multi.Gradient, fun optimize.MultiObjGrad) (status.Status, error) { counter := lbfgs.counter q := lbfgs.q a := lbfgs.a b := lbfgs.b rhoHist := lbfgs.rhoHist sHist := lbfgs.sHist yHist := lbfgs.yHist gamma_k := lbfgs.gamma_k tmp := lbfgs.tmp p_k := lbfgs.p_k s_k := lbfgs.s_k y_k := lbfgs.y_k z := lbfgs.z // Calculate search direction for i, val := range grad.Curr() { q[i] = val } for i := counter - 1; i >= 0; i-- { a[i] = rhoHist[i] * floats.Dot(sHist[i], q) copy(tmp, yHist[i]) floats.Scale(a[i], tmp) floats.Sub(q, tmp) } for i := lbfgs.NumStore - 1; i >= counter; i-- { a[i] = rhoHist[i] * floats.Dot(sHist[i], q) copy(tmp, yHist[i]) floats.Scale(a[i], tmp) //fmt.Println(q) //fmt.Println(tmp) floats.Sub(q, tmp) } // Assume H_0 is the identity times gamma_k copy(z, q) floats.Scale(gamma_k, z) // Second loop for update, going oldest to newest for i := counter; i < lbfgs.NumStore; i++ { b[i] = rhoHist[i] * floats.Dot(yHist[i], z) copy(tmp, sHist[i]) floats.Scale(a[i]-b[i], tmp) floats.Add(z, tmp) } for i := 0; i < counter; i++ { b[i] = rhoHist[i] * floats.Dot(yHist[i], z) copy(tmp, sHist[i]) floats.Scale(a[i]-b[i], tmp) floats.Add(z, tmp) } lbfgs.a = a lbfgs.b = b copy(p_k, z) floats.Scale(-1, p_k) normP_k := floats.Norm(p_k, 2) // Perform line search -- need to find some way to implement this, especially bookkeeping function values linesearchResult, err := linesearch.Linesearch(fun, lbfgs.LinesearchMethod, lbfgs.LinesearchSettings, lbfgs.Wolfe, p_k, loc.Curr(), obj.Curr(), grad.Curr()) // In the future add a check to switch to a different linesearcher? if err != nil { return status.LinesearchFailure, err } x_kp1 := linesearchResult.Loc f_kp1 := linesearchResult.Obj g_kp1 := linesearchResult.Grad alpha_k := linesearchResult.Step // Update hessian estimate copy(s_k, p_k) floats.Scale(alpha_k, s_k) copy(y_k, g_kp1) floats.Sub(y_k, grad.Curr()) skDotYk := floats.Dot(s_k, y_k) // Bookkeep the results stepSize := alpha_k * normP_k lbfgs.step.AddToHist(stepSize) lbfgs.step.SetCurr(stepSize) loc.SetCurr(x_kp1) //lbfgs.loc.AddToHist(x_kp1) //fmt.Println(lbfgs.loc.GetHist()) obj.SetCurr(f_kp1) grad.SetCurr(g_kp1) copy(sHist[counter], s_k) copy(yHist[counter], y_k) rhoHist[counter] = 1 / skDotYk lbfgs.gamma_k = skDotYk / floats.Dot(y_k, y_k) lbfgs.counter += 1 if lbfgs.counter == lbfgs.NumStore { lbfgs.counter = 0 } return status.Continue, nil }