func TestChebyshev(t *testing.T) { var vectorX, vectorY *mat64.Dense chebyshev := NewChebyshev() Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(4, 1, []float64{1, 2, 3, 4}) vectorY = mat64.NewDense(4, 1, []float64{-5, -6, 7, 8}) Convey("When calculating distance with two vectors", func() { result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { chebyshev.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. // // The weights must have length equal to the number of rows in // input data matrix x. If cov is nil, then a new matrix with appropriate size will // be constructed. If cov is not nil, it should have the same number of columns as the // input data matrix x, and it will be used as the destination for the covariance // data. Weights must not be negative. func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. r, c := x.Dims() if cov == nil { cov = mat64.NewSymDense(c, nil) } else if n := cov.Symmetric(); n != c { panic(matrix.ErrShape) } var xt mat64.Dense xt.Clone(x.T()) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(weights) != len(v), so // we don't have to check the size later. mean := Mean(v, weights) floats.AddConst(-mean, v) } if weights == nil { // Calculate the normalization factor // scaled by the sample size. cov.SymOuterK(1/(float64(r)-1), &xt) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range weights { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor // scaled by the weighted sample size. cov.SymOuterK(1/(floats.Sum(weights)-1), &xt) return cov }
func TestCranberrra(t *testing.T) { var vectorX, vectorY *mat64.Dense cranberra := NewCranberra() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := cranberra.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(5, 1, []float64{1, 2, 3, 4, 9}) vectorY = mat64.NewDense(5, 1, []float64{-5, -6, 7, 4, 3}) Convey("When calculating distance with two vectors", func() { result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { cranberra.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
func TestManhattan(t *testing.T) { var vectorX, vectorY *mat64.Dense manhattan := NewManhattan() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := manhattan.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) Convey("When calculating distance with column vectors", func() { result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with row vectors", func() { vectorX.Copy(vectorX.T()) vectorY.Copy(vectorY.T()) result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with different dimension matrices", func() { vectorX.Clone(vectorX.T()) So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
// Activate propagates the given input matrix (with) across the network // a certain number of times (up to maxIterations). // // The with matrix should be size * size elements, with only the values // of input neurons set (everything else should be zero). // // If the network is conceptually organised into layers, maxIterations // should be set to the number of layers. // // This function overwrites whatever's stored in its first argument. func (n *Network) Activate(with *mat64.Dense, maxIterations int) { // Add bias and feed to activation biasFunc := func(r, c int, v float64) float64 { return v + n.biases[r] } activFunc := func(r, c int, v float64) float64 { return n.funcs[r].Forward(v) } tmp := new(mat64.Dense) tmp.Clone(with) // Main loop for i := 0; i < maxIterations; i++ { with.Mul(n.weights, with) with.Apply(biasFunc, with) with.Apply(activFunc, with) } }
// Factors returns matrices W and H that are non-negative factors of V within the // specified tolerance and computation limits given initial non-negative solutions Wo // and Ho. func Factors(V, Wo, Ho *mat64.Dense, c Config) (W, H *mat64.Dense, ok bool) { to := time.Now() W = Wo H = Ho var ( wr, wc = W.Dims() hr, hc = H.Dims() tmp mat64.Dense ) var vhT mat64.Dense gW := mat64.NewDense(wr, wc, nil) tmp.Mul(H, H.T()) gW.Mul(W, &tmp) vhT.Mul(V, H.T()) gW.Sub(gW, &vhT) var wTv mat64.Dense gH := mat64.NewDense(hr, hc, nil) tmp.Reset() tmp.Mul(W.T(), W) gH.Mul(&tmp, H) wTv.Mul(W.T(), V) gH.Sub(gH, &wTv) var gHT, gWHT mat64.Dense gHT.Clone(gH.T()) gWHT.Stack(gW, &gHT) grad := mat64.Norm(&gWHT, 2) tolW := math.Max(0.001, c.Tolerance) * grad tolH := tolW var ( _ok bool iter int ) decFiltW := func(r, c int, v float64) float64 { // decFiltW is applied to gW, so v = gW.At(r, c). if v < 0 || W.At(r, c) > 0 { return v } return 0 } decFiltH := func(r, c int, v float64) float64 { // decFiltH is applied to gH, so v = gH.At(r, c). if v < 0 || H.At(r, c) > 0 { return v } return 0 } var vT, hT, wT mat64.Dense for i := 0; i < c.MaxIter; i++ { gW.Apply(decFiltW, gW) gH.Apply(decFiltH, gH) var proj float64 for _, v := range gW.RawMatrix().Data { proj += v * v } for _, v := range gH.RawMatrix().Data { proj += v * v } proj = math.Sqrt(proj) if proj < c.Tolerance*grad || time.Now().Sub(to) > c.Limit { break } vT.Clone(V.T()) hT.Clone(H.T()) wT.Clone(W.T()) W, gW, iter, ok = nnlsSubproblem(&vT, &hT, &wT, tolW, c.MaxOuterSub, c.MaxInnerSub) if iter == 0 { tolW *= 0.1 } wT.Reset() wT.Clone(W.T()) W = &wT var gWT mat64.Dense gWT.Clone(gW.T()) *gW = gWT H, gH, iter, _ok = nnlsSubproblem(V, W, H, tolH, c.MaxOuterSub, c.MaxInnerSub) ok = ok && _ok if iter == 0 { tolH *= 0.1 } } return W, H, ok }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. The matrix returned will be symmetric and square. // // The weights wts should have the length equal to the number of rows in // input data matrix x. If c is nil, then a new matrix with appropriate size will // be constructed. If c is not nil, it should be a square matrix with the same // number of columns as the input data matrix x, and it will be used as the receiver // for the covariance data. Weights cannot be negative. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. // TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change // the returned type from a *mat.Dense to a *mat.Symmetric. r, c := x.Dims() if cov == nil { cov = mat64.NewDense(c, c, nil) } else if covr, covc := cov.Dims(); covr != covc || covc != c { panic(mat64.ErrShape) } var xt mat64.Dense xt.Clone(x.T()) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(wts) != len(v), so // we don't have to check the size later. mean := Mean(v, wts) floats.AddConst(-mean, v) } var n float64 if wts == nil { n = float64(r) cov.Mul(&xt, (&xt).T()) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range wts { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor. n = floats.Sum(wts) cov.Mul(&xt, (&xt).T()) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := new(mat64.QR) qr.Factorize(explVariables) var q, reg mat64.Dense q.QFromQR(qr) reg.RFromQR(qr) var transposed, qty mat64.Dense transposed.Clone(q.T()) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }