func (lr *LROWLQN) getScore(model *core.Vector, sample *core.Sample) float64 { var score float64 = 0 for _, fea := range sample.Features { score += model.GetValue(fea.Id) * fea.Value } return score }
func (h *QuasiNewtonHelper) BackTrackingLineSearch(cost float64, pos *core.Vector, grad *core.Vector, dir *core.Vector, isInit bool) (nextCost float64, nextPos *core.Vector) { dotGradDir := grad.Dot(dir) if dotGradDir == 0 { return cost, pos } if dotGradDir > 0 { panic("BackTracking: to the opposite direction of grad") } alpha := 1.0 backoff := 0.5 if isInit { normDir := math.Sqrt(dir.Dot(dir)) alpha = (1 / normDir) backoff = 0.1 } var c1 float64 = 1e-4 for cntItr := 0; cntItr <= MAX_BACKTRACKING_ITER; cntItr++ { nextPos = h.minimizer.NextPoint(pos, dir, alpha) nextCost = h.minimizer.Evaluate(nextPos) if nextCost <= cost+c1*dotGradDir*alpha { break } alpha *= backoff } return nextCost, nextPos }
func (m *OWLQNMinimizer) fixDirSign(dir *core.Vector, steepestDescDir *core.Vector) { if m.l1reg == 0 { return } for key, val := range dir.Data { if val*steepestDescDir.GetValue(key) <= 0 { dir.SetValue(key, 0) } } }
func (cov_func *CovSEARD) Cov(x1 *core.Vector, x2 *core.Vector) float64 { ret := 0.0 tmp := 0.0 for key, r := range cov_func.Radiuses.Data { v1 := x1.GetValue(key) v2 := x2.GetValue(key) tmp = (v1 - v2) / r ret += tmp * tmp } ret = cov_func.Amp * math.Exp(-ret) return ret }
func (m *OWLQNMinimizer) NextPoint(curPos *core.Vector, dir *core.Vector, alpha float64) *core.Vector { if owlqn_output_switch { fmt.Printf(".") } newPos := curPos.ElemWiseMultiplyAdd(dir, alpha) if m.l1reg > 0 { for key, val := range curPos.Data { if val*newPos.GetValue(key) < 0 { newPos.SetValue(key, 0) } } } return newPos }
// Description: the pos and gradient arguments should NOT be modified outside func (h *QuasiNewtonHelper) UpdateState(nextPos *core.Vector, nextGrad *core.Vector) (isOptimal bool) { if int64(len(h.sList)) >= h.numHist { h.sList = h.sList[1:] h.yList = h.yList[1:] h.roList = h.roList[1:] } newS := nextPos.ElemWiseMultiplyAdd(h.curPos, -1) newY := nextGrad.ElemWiseMultiplyAdd(h.curGrad, -1) ro := newS.Dot(newY) h.sList = append(h.sList, newS) h.yList = append(h.yList, newY) h.roList = append(h.roList, ro) h.curPos = nextPos h.curGrad = nextGrad return ro == 0 }
func (lr *LROWLQN) Equals(x *core.Vector, y *core.Vector) bool { if y == nil && x == nil { return true } if y == nil || x == nil { return false } for key, val := range x.Data { if y.GetValue(key) != val { return false } } for key, val := range y.Data { if x.GetValue(key) != val { return false } } return true }
// Description: assume all the features in x also appears in grad func (m *OWLQNMinimizer) updateGrad(x *core.Vector, grad *core.Vector) { if m.l1reg == 0 { return } for key, val := range grad.Data { xval := x.GetValue(key) if xval < 0 { grad.SetValue(key, val-m.l1reg) } else if xval > 0 { grad.SetValue(key, val+m.l1reg) } else { if val < -m.l1reg { grad.SetValue(key, val+m.l1reg) } else if val > m.l1reg { grad.SetValue(key, val-m.l1reg) } } } return }
// Description: Update the dir from -grad to optimal direction // Dir will be modified directly func (h *QuasiNewtonHelper) ApplyQuasiInverseHession(dir *core.Vector) { count := len(h.sList) if count == 0 { return } alphas := make([]float64, count, count) for n := count - 1; n >= 0; n-- { alphas[n] = -dir.Dot(h.sList[n]) / h.roList[n] dir.ApplyElemWiseMultiplyAccumulation(h.yList[n], alphas[n]) } lastY := h.yList[count-1] yDotY := lastY.Dot(lastY) scalar := h.roList[count-1] / yDotY dir.ApplyScale(scalar) for n := 0; n < count; n++ { beta := dir.Dot(h.yList[n]) / h.roList[n] dir.ApplyElemWiseMultiplyAccumulation(h.sList[n], -alphas[n]-beta) } return }
func (m *LBFGSMinimizer) Minimize(costfun DiffFunction, init *core.Vector) *core.Vector { m.costFun = costfun var cost float64 = costfun.Value(init) var grad *core.Vector = costfun.Gradient(init).Copy() var pos *core.Vector = init.Copy() var terminalCriterion *relativeMeanImprCriterion = NewRelativeMeanImprCriterion(m.tolerance) terminalCriterion.addCost(cost) var helper *QuasiNewtonHelper = NewQuasiNewtonHelper(m.numHist, m, pos, grad) if lbfgs_output_switch { fmt.Println("Iter\tcost\timprovement") fmt.Printf("%d\t%e\tUndefined", 0, cost) } for iter := 1; iter <= m.maxIteration; iter++ { dir := grad.Copy() dir.ApplyScale(-1.0) helper.ApplyQuasiInverseHession(dir) newCost, newPos := helper.BackTrackingLineSearch(cost, pos, grad, dir, iter == 1) if lbfgs_output_switch { fmt.Println("") } if cost == newCost { break } cost = newCost pos = newPos grad = costfun.Gradient(pos).Copy() terminalCriterion.addCost(cost) if lbfgs_output_switch { fmt.Printf("%d\t%e\t%e", iter, newCost, terminalCriterion.improvement) } if terminalCriterion.isTerminable() || helper.UpdateState(pos, grad) { if lbfgs_output_switch { fmt.Println("") } break } } return pos }
func (lr *LROWLQN) updateValueGrad(pos *core.Vector, dataset *core.DataSet) { var totalLoss float64 = 0.0 var grad *core.Vector = core.NewVector() for _, sample := range dataset.Samples { var score float64 = lr.getScore(pos, sample) var signScore float64 = score if sample.Label == 0 { signScore = -score } var prob float64 var lnProb float64 if signScore < -30 { prob = 0 lnProb = signScore } else if signScore > 30 { prob = 1 lnProb = 0 } else { prob = 1.0 / (1.0 + math.Exp(-signScore)) lnProb = math.Log(prob) } var scale float64 if sample.Label == 0 { scale = (1 - prob) } else { scale = -(1 - prob) } totalLoss += -lnProb for _, fea := range sample.Features { grad.AddValue(fea.Id, scale*fea.Value) } } lr.lastPos = pos.Copy() lr.lastCost = totalLoss lr.lastGrad = grad }
func (m *OWLQNMinimizer) Minimize(costfun DiffFunction, init *core.Vector) *core.Vector { m.costFun = costfun var cost float64 = m.Evaluate(init) var grad *core.Vector = costfun.Gradient(init).Copy() var pos *core.Vector = init.Copy() var terminalCriterion *relativeMeanImprCriterion = NewRelativeMeanImprCriterion(m.tolerance) terminalCriterion.addCost(cost) var helper *QuasiNewtonHelper = NewQuasiNewtonHelper(m.numHist, m, pos, grad) if owlqn_output_switch { fmt.Println("Iter\tcost\timprovement") fmt.Printf("%d\t%e\tUndefined", 0, cost) } for iter := 1; iter <= m.maxIteration; iter++ { // customed steepest descending dir steepestDescDir := grad.Copy() m.updateGrad(pos, steepestDescDir) steepestDescDir.ApplyScale(-1.0) dir := steepestDescDir.Copy() // quasi-newton dir helper.ApplyQuasiInverseHession(dir) m.fixDirSign(dir, steepestDescDir) // customed grad for the new position potentialGrad := grad.Copy() m.updateGradForNewPos(pos, potentialGrad, dir) newCost, newPos := helper.BackTrackingLineSearch(cost, pos, potentialGrad, dir, iter == 1) if owlqn_output_switch { fmt.Println("") } if cost == newCost { break } cost = newCost pos = newPos grad = costfun.Gradient(pos).Copy() terminalCriterion.addCost(cost) if owlqn_output_switch { fmt.Printf("%d\t%e\t%e", iter, newCost, terminalCriterion.improvement) } if terminalCriterion.isTerminable() || helper.UpdateState(pos, grad) { if owlqn_output_switch { fmt.Println("") } break } } return pos }
/* Given matrix m and vector v, compute inv(m)*v. Based on Gibbs and MacKay 1997, and Mark N. Gibbs's PhD dissertation Details: A - positive seminidefinite matrix u - a vector theta - positive number C = A + I*theta Returns inv(C)*u - So you need the diagonal noise term for covariance matrix in a sense. However, this algorithm is numerically stable, the noise term can be very small and the inversion can still be calculated... */ func (algo *GaussianProcess) ApproximateInversion(A *core.Matrix, u *core.Vector, theta float64, dim int64) *core.Vector { max_itr := 500 tol := 0.01 C := core.NewMatrix() for key, val := range A.Data { C.Data[key] = val.Copy() } // Add theta to diagonal elements for i := int64(0); i < dim; i++ { _, ok := C.Data[i] if !ok { C.Data[i] = core.NewVector() } C.Data[i].Data[i] = C.Data[i].Data[i] + theta } var Q_l float64 var Q_u float64 var dQ float64 u_norm := u.Dot(u) / 2 // Lower bound y_l := core.NewVector() g_l := u.Copy() h_l := u.Copy() lambda_l := float64(0) gamma_l := float64(0) var tmp_f1 float64 var tmp_f2 float64 var tmp_v1 *core.Vector tmp_f1 = g_l.Dot(g_l) tmp_v1 = C.MultiplyVector(h_l) // Upper bound y_u := core.NewVector() g_u := u.Copy() h_u := u.Copy() lambda_u := float64(0) gamma_u := float64(0) var tmp_f3 float64 var tmp_f4 float64 var tmp_v3 *core.Vector var tmp_v4 *core.Vector tmp_v3 = g_u.MultiplyMatrix(A) tmp_v4 = C.MultiplyVector(h_u) tmp_f3 = tmp_v1.Dot(g_u) for i := 0; i < max_itr; i++ { // Lower bound lambda_l = tmp_f1 / h_l.Dot(tmp_v1) y_l.AddVector(h_l, lambda_l) //y_l next Q_l = y_l.Dot(u) - 0.5*(y_l.MultiplyMatrix(C)).Dot(y_l) // Upper bound lambda_u = tmp_f3 / tmp_v3.Dot(tmp_v4) y_u.AddVector(h_u, lambda_u) //y_u next Q_u = (y_u.MultiplyMatrix(A)).Dot(u) - 0.5*((y_u.MultiplyMatrix(C)).MultiplyMatrix(A)).Dot(y_u) dQ = (u_norm-Q_u)/theta - Q_l if dQ < tol { break } // Lower bound var updates g_l.AddVector(tmp_v1, -lambda_l) //g_l next tmp_f2 = g_l.Dot(g_l) gamma_l = tmp_f2 / tmp_f1 for key, val := range h_l.Data { h_l.SetValue(key, val*gamma_l) } h_l.AddVector(g_l, 1) //h_l next tmp_f1 = tmp_f2 //tmp_f1 next tmp_v1 = C.MultiplyVector(h_l) //tmp_v1 next // Upper bound var updates g_u.AddVector(tmp_v4, -lambda_u) //g_u next tmp_v3 = g_u.MultiplyMatrix(A) //tmp_v3 next tmp_f4 = tmp_v3.Dot(g_u) gamma_u = tmp_f4 / tmp_f3 for key, val := range h_u.Data { h_u.SetValue(key, val*gamma_u) } h_u.AddVector(g_u, 1) //h_u next tmp_v4 = C.MultiplyVector(h_u) //tmp_v4 next tmp_f3 = tmp_f4 // tmp_f3 next } return y_l }
func (m *LBFGSMinimizer) NextPoint(curPos *core.Vector, dir *core.Vector, alpha float64) *core.Vector { if lbfgs_output_switch { fmt.Printf(".") } return curPos.ElemWiseMultiplyAdd(dir, alpha) }
// Description: assume all the features in x also appears in grad // all the features in dir must be in grad func (m *OWLQNMinimizer) updateGradForNewPos(x *core.Vector, grad *core.Vector, dir *core.Vector) { if m.l1reg == 0 { return } for key, val := range grad.Data { xval := x.GetValue(key) if xval < 0 { grad.SetValue(key, val-m.l1reg) } else if xval > 0 { grad.SetValue(key, val+m.l1reg) } else { dirval := dir.GetValue(key) if dirval < 0 { grad.SetValue(key, val-m.l1reg) } else if dirval > 0 { grad.SetValue(key, val+m.l1reg) } } } return }
func (c *KNN) Kernel(x, y *core.Vector) float64 { z := x.Copy() z.AddVector(y, -1.0) ret := math.Exp(-1.0 * z.NormL2() / 20.0) return ret }
func Distance(x, y *core.Vector) float64 { z := x.Copy() z.AddVector(y, -1) d := z.NormL2() return d }