func GradientDescent(X *mat64.Dense, y *mat64.Vector, alpha, tolerance float64, maxIters int) *mat64.Vector { // m = Number of Training Examples // n = Number of Features m, n := X.Dims() h := mat64.NewVector(m, nil) partials := mat64.NewVector(n, nil) new_theta := mat64.NewVector(n, nil) Regression: for i := 0; i < maxIters; i++ { // Calculate partial derivatives h.MulVec(X, new_theta) for el := 0; el < m; el++ { val := (h.At(el, 0) - y.At(el, 0)) / float64(m) h.SetVec(el, val) } partials.MulVec(X.T(), h) // Update theta values for el := 0; el < n; el++ { new_val := new_theta.At(el, 0) - (alpha * partials.At(el, 0)) new_theta.SetVec(el, new_val) } // Check the "distance" to the local minumum dist := math.Sqrt(mat64.Dot(partials, partials)) if dist <= tolerance { break Regression } } return new_theta }
func (self *Layer) Update(learningConfiguration LearningConfiguration) { var deltas mat64.Dense deltas.Mul(self.Deltas, self.Input) rows, cols := self.Weight.Dims() weight := self.Weight.View(0, 0, rows-1, cols).(*mat64.Dense) if *learningConfiguration.Decay > 0 { var decay mat64.Dense decay.Scale(*learningConfiguration.Decay, weight) deltas.Sub(&deltas, decay.T()) } deltas.Scale(*learningConfiguration.Rate, &deltas) weight.Sub(weight, deltas.T()) }
// Back propagate the error and update the weights. func (m *Mind) Back(input *mat64.Dense, output *mat64.Dense) { ErrorOutputLayer := &mat64.Dense{} DeltaOutputLayer := &mat64.Dense{} HiddenOutputChanges := &mat64.Dense{} DeltaHiddenLayer := &mat64.Dense{} InputHiddenChanges := &mat64.Dense{} ErrorOutputLayer.Sub(output, m.Results.OutputResult) DeltaOutputLayer.MulElem(m.ActivatePrime(m.Results.OutputSum), ErrorOutputLayer) HiddenOutputChanges.Mul(m.Results.HiddenResult.T(), DeltaOutputLayer) HiddenOutputChanges.Scale(m.LearningRate, HiddenOutputChanges) m.Weights.HiddenOutput.Add(HiddenOutputChanges, m.Weights.HiddenOutput) DeltaHiddenLayer.Mul(DeltaOutputLayer, m.Weights.HiddenOutput.T()) DeltaHiddenLayer.MulElem(m.ActivatePrime(m.Results.HiddenSum), DeltaHiddenLayer) InputHiddenChanges.Mul(input.T(), DeltaHiddenLayer) InputHiddenChanges.Scale(m.LearningRate, InputHiddenChanges) m.Weights.InputHidden.Add(InputHiddenChanges, m.Weights.InputHidden) }
// Back propagate the error and update the weights. func (m *Mind) Back(input *mat64.Dense, output *mat64.Dense) { ErrorOutputLayer := mat64.NewDense(1, 1, nil) ErrorOutputLayer.Sub(output, m.Results.OutputResult) DeltaOutputLayer := m.ActivatePrime(m.Results.OutputSum) DeltaOutputLayer.MulElem(DeltaOutputLayer, ErrorOutputLayer) HiddenOutputChanges := mat64.DenseCopyOf(m.Results.HiddenResult.T()) HiddenOutputChanges.Product(DeltaOutputLayer) HiddenOutputChanges.Scale(m.LearningRate, HiddenOutputChanges) m.Weights.HiddenOutput.Add(m.Weights.HiddenOutput, HiddenOutputChanges) DeltaHiddenLayer := mat64.DenseCopyOf(DeltaOutputLayer) DeltaHiddenLayer.Product(DeltaOutputLayer, m.Weights.HiddenOutput.T()) DeltaHiddenLayer.MulElem(DeltaHiddenLayer, m.ActivatePrime(m.Results.HiddenSum)) InputHiddenChanges := mat64.DenseCopyOf(input.T()) InputHiddenChanges.Product(DeltaHiddenLayer) InputHiddenChanges.Scale(m.LearningRate, InputHiddenChanges) m.Weights.InputHidden.Add(m.Weights.InputHidden, InputHiddenChanges) }
func TestChebyshev(t *testing.T) { var vectorX, vectorY *mat64.Dense chebyshev := NewChebyshev() Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(4, 1, []float64{1, 2, 3, 4}) vectorY = mat64.NewDense(4, 1, []float64{-5, -6, 7, 8}) Convey("When calculating distance with two vectors", func() { result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { chebyshev.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
func TestCranberrra(t *testing.T) { var vectorX, vectorY *mat64.Dense cranberra := NewCranberra() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := cranberra.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(5, 1, []float64{1, 2, 3, 4, 9}) vectorY = mat64.NewDense(5, 1, []float64{-5, -6, 7, 4, 3}) Convey("When calculating distance with two vectors", func() { result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { cranberra.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
func TestManhattan(t *testing.T) { var vectorX, vectorY *mat64.Dense manhattan := NewManhattan() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := manhattan.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) Convey("When calculating distance with column vectors", func() { result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
// Factors returns matrices W and H that are non-negative factors of V within the // specified tolerance and computation limits given initial non-negative solutions Wo // and Ho. func Factors(V, Wo, Ho *mat64.Dense, c Config) (W, H *mat64.Dense, ok bool) { to := time.Now() W = Wo H = Ho var ( wr, wc = W.Dims() hr, hc = H.Dims() tmp mat64.Dense ) var vhT mat64.Dense gW := mat64.NewDense(wr, wc, nil) tmp.Mul(H, H.T()) gW.Mul(W, &tmp) vhT.Mul(V, H.T()) gW.Sub(gW, &vhT) var wTv mat64.Dense gH := mat64.NewDense(hr, hc, nil) tmp.Reset() tmp.Mul(W.T(), W) gH.Mul(&tmp, H) wTv.Mul(W.T(), V) gH.Sub(gH, &wTv) var gHT, gWHT mat64.Dense gHT.Clone(gH.T()) gWHT.Stack(gW, &gHT) grad := mat64.Norm(&gWHT, 2) tolW := math.Max(0.001, c.Tolerance) * grad tolH := tolW var ( _ok bool iter int ) decFiltW := func(r, c int, v float64) float64 { // decFiltW is applied to gW, so v = gW.At(r, c). if v < 0 || W.At(r, c) > 0 { return v } return 0 } decFiltH := func(r, c int, v float64) float64 { // decFiltH is applied to gH, so v = gH.At(r, c). if v < 0 || H.At(r, c) > 0 { return v } return 0 } var vT, hT, wT mat64.Dense for i := 0; i < c.MaxIter; i++ { gW.Apply(decFiltW, gW) gH.Apply(decFiltH, gH) var proj float64 for _, v := range gW.RawMatrix().Data { proj += v * v } for _, v := range gH.RawMatrix().Data { proj += v * v } proj = math.Sqrt(proj) if proj < c.Tolerance*grad || time.Now().Sub(to) > c.Limit { break } vT.Clone(V.T()) hT.Clone(H.T()) wT.Clone(W.T()) W, gW, iter, ok = nnlsSubproblem(&vT, &hT, &wT, tolW, c.MaxOuterSub, c.MaxInnerSub) if iter == 0 { tolW *= 0.1 } wT.Reset() wT.Clone(W.T()) W = &wT var gWT mat64.Dense gWT.Clone(gW.T()) *gW = gWT H, gH, iter, _ok = nnlsSubproblem(V, W, H, tolH, c.MaxOuterSub, c.MaxInnerSub) ok = ok && _ok if iter == 0 { tolH *= 0.1 } } return W, H, ok }
func nnlsSubproblem(V, W, Ho *mat64.Dense, tol float64, outer, inner int) (H, G *mat64.Dense, i int, ok bool) { H = new(mat64.Dense) H.Clone(Ho) var WtV, WtW mat64.Dense WtV.Mul(W.T(), V) WtW.Mul(W.T(), W) alpha, beta := 1., 0.1 decFilt := func(r, c int, v float64) float64 { // decFilt is applied to G, so v = G.At(r, c). if v < 0 || H.At(r, c) > 0 { return v } return 0 } G = new(mat64.Dense) for i = 0; i < outer; i++ { G.Mul(&WtW, H) G.Sub(G, &WtV) G.Apply(decFilt, G) if mat64.Norm(G, 2) < tol { break } var ( reduce bool Hp *mat64.Dense d, dQ mat64.Dense ) for j := 0; j < inner; j++ { var Hn mat64.Dense Hn.Scale(alpha, G) Hn.Sub(H, &Hn) Hn.Apply(posFilt, &Hn) d.Sub(&Hn, H) dQ.Mul(&WtW, &d) dQ.MulElem(&dQ, &d) d.MulElem(G, &d) sufficient := 0.99*mat64.Sum(&d)+0.5*mat64.Sum(&dQ) < 0 if j == 0 { reduce = !sufficient Hp = H } if reduce { if sufficient { H = &Hn ok = true break } else { alpha *= beta } } else { if !sufficient || mat64.Equal(Hp, &Hn) { H = Hp break } else { alpha /= beta Hp = &Hn } } } } return H, G, i, ok }
func (self *Layer) BackwardOutput(values *mat64.Dense, error_function ErrorFunction) { // TODO(ariw): ErrorFunction Delta use goes here. values.Sub(self.Output, values) self.Deltas.MulElem(values.T(), self.Derivatives) }
func simplex(initialBasic []int, c []float64, A mat64.Matrix, b []float64, tol float64) (float64, []float64, []int, error) { err := verifyInputs(initialBasic, c, A, b) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } return math.NaN(), nil, nil, err } m, n := A.Dims() // There is at least one optimal solution to the LP which is at the intersection // to a set of constraint boundaries. For a standard form LP with m variables // and n equality constraints, at least m-n elements of x must equal zero // at optimality. The Simplex algorithm solves the standard-form LP by starting // at an initial constraint vertex and successively moving to adjacent constraint // vertices. At every vertex, the set of non-zero x values is the "basic // feasible solution". The list of non-zero x's are maintained in basicIdxs, // the respective columns of A are in ab, and the actual non-zero values of // x are in xb. // // The LP is equality constrained such that A * x = b. This can be expanded // to // ab * xb + an * xn = b // where ab are the columns of a in the basic set, and an are all of the // other columns. Since each element of xn is zero by definition, this means // that for all feasible solutions xb = ab^-1 * b. // // Before the simplex algorithm can start, an initial feasible solution must // be found. If initialBasic is non-nil a feasible solution has been supplied. // Otherwise the "Phase I" problem must be solved to find an initial feasible // solution. var basicIdxs []int // The indices of the non-zero x values. var ab *mat64.Dense // The subset of columns of A listed in basicIdxs. var xb []float64 // The non-zero elements of x. xb = ab^-1 b if initialBasic != nil { // InitialBasic supplied. Panic if incorrect length or infeasible. if len(initialBasic) != m { panic("lp: incorrect number of initial vectors") } ab = extractColumns(A, initialBasic) xb, err = initializeFromBasic(ab, b) if err != nil { panic(err) } basicIdxs = make([]int, len(initialBasic)) copy(basicIdxs, initialBasic) } else { // No inital basis supplied. Solve the PhaseI problem. basicIdxs, ab, xb, err = findInitialBasic(A, b) if err != nil { return math.NaN(), nil, nil, err } } // basicIdxs contains the indexes for an initial feasible solution, // ab contains the extracted columns of A, and xb contains the feasible // solution. All x not in the basic set are 0 by construction. // nonBasicIdx is the set of nonbasic variables. nonBasicIdx := make([]int, 0, n-m) inBasic := make(map[int]struct{}) for _, v := range basicIdxs { inBasic[v] = struct{}{} } for i := 0; i < n; i++ { _, ok := inBasic[i] if !ok { nonBasicIdx = append(nonBasicIdx, i) } } // cb is the subset of c for the basic variables. an and cn // are the equivalents to ab and cb but for the nonbasic variables. cb := make([]float64, len(basicIdxs)) for i, idx := range basicIdxs { cb[i] = c[idx] } cn := make([]float64, len(nonBasicIdx)) for i, idx := range nonBasicIdx { cn[i] = c[idx] } an := extractColumns(A, nonBasicIdx) bVec := mat64.NewVector(len(b), b) cbVec := mat64.NewVector(len(cb), cb) // Temporary data needed each iteration. (Described later) r := make([]float64, n-m) move := make([]float64, m) // Solve the linear program starting from the initial feasible set. This is // the "Phase 2" problem. // // Algorithm: // 1) Compute the "reduced costs" for the non-basic variables. The reduced // costs are the lagrange multipliers of the constraints. // r = cn - an^T * ab^-T * cb // 2) If all of the reduced costs are positive, no improvement is possible, // and the solution is optimal (xn can only increase because of // non-negativity constraints). Otherwise, the solution can be improved and // one element will be exchanged in the basic set. // 3) Choose the x_n with the most negative value of r. Call this value xe. // This variable will be swapped into the basic set. // 4) Increase xe until the next constraint boundary is met. This will happen // when the first element in xb becomes 0. The distance xe can increase before // a given element in xb becomes negative can be found from // xb = Ab^-1 b - Ab^-1 An xn // = Ab^-1 b - Ab^-1 Ae xe // = bhat + d x_e // xe = bhat_i / - d_i // where Ae is the column of A corresponding to xe. // The constraining basic index is the first index for which this is true, // so remove the element which is min_i (bhat_i / -d_i), assuming d_i is negative. // If no d_i is less than 0, then the problem is unbounded. // 5) If the new xe is 0 (that is, bhat_i == 0), then this location is at // the intersection of several constraints. Use the Bland rule instead // of the rule in step 4 to avoid cycling. for { // Compute reduced costs -- r = cn - an^T ab^-T cb var tmp mat64.Vector err = tmp.SolveVec(ab.T(), cbVec) if err != nil { break } data := make([]float64, n-m) tmp2 := mat64.NewVector(n-m, data) tmp2.MulVec(an.T(), &tmp) floats.SubTo(r, cn, data) // Replace the most negative element in the simplex. If there are no // negative entries then the optimal solution has been found. minIdx := floats.MinIdx(r) if r[minIdx] >= -tol { break } for i, v := range r { if math.Abs(v) < rRoundTol { r[i] = 0 } } // Compute the moving distance. err = computeMove(move, minIdx, A, ab, xb, nonBasicIdx) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } // Replace the basic index along the tightest constraint. replace := floats.MinIdx(move) if move[replace] <= 0 { replace, minIdx, err = replaceBland(A, ab, xb, basicIdxs, nonBasicIdx, r, move) if err != nil { if err == ErrUnbounded { return math.Inf(-1), nil, nil, ErrUnbounded } break } } // Replace the constrained basicIdx with the newIdx. basicIdxs[replace], nonBasicIdx[minIdx] = nonBasicIdx[minIdx], basicIdxs[replace] cb[replace], cn[minIdx] = cn[minIdx], cb[replace] tmpCol1 := mat64.Col(nil, replace, ab) tmpCol2 := mat64.Col(nil, minIdx, an) ab.SetCol(replace, tmpCol2) an.SetCol(minIdx, tmpCol1) // Compute the new xb. xbVec := mat64.NewVector(len(xb), xb) err = xbVec.SolveVec(ab, bVec) if err != nil { break } } // Found the optimum successfully or died trying. The basic variables get // their values, and the non-basic variables are all zero. opt := floats.Dot(cb, xb) xopt := make([]float64, n) for i, v := range basicIdxs { xopt[v] = xb[i] } return opt, xopt, basicIdxs, err }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := new(mat64.QR) qr.Factorize(explVariables) var q, reg mat64.Dense q.QFromQR(qr) reg.RFromQR(qr) var transposed, qty mat64.Dense transposed.Clone(q.T()) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }