func (lr *LinearRegression) Fit(inst *base.Instances) error { if inst.Rows < inst.GetAttributeCount() { return NotEnoughDataError } // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(inst.Rows, 1, nil) explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil) for i := 0; i < inst.Rows; i++ { observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data for j := 0; j < inst.GetAttributeCount(); j++ { if j == 0 { // Set intercepts to 1.0 // Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/ explVariables.Set(i, 0, 1.0) } else { explVariables.Set(i, j, inst.Get(i, j-1)) } } } n := inst.GetAttributeCount() qr := mat64.QR(explVariables) q := qr.Q() reg := qr.R() var transposed, qty mat64.Dense transposed.TCopy(q) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true return nil }
func TestChebyshev(t *testing.T) { var vectorX, vectorY *mat64.Dense chebyshev := NewChebyshev() Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(4, 1, []float64{1, 2, 3, 4}) vectorY = mat64.NewDense(4, 1, []float64{-5, -6, 7, 8}) Convey("When calculating distance with two vectors", func() { result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with row vectors", func() { vectorX.TCopy(vectorX) vectorY.TCopy(vectorY) result := chebyshev.Distance(vectorX, vectorY) Convey("The result should be 8", func() { So(result, ShouldEqual, 8) }) }) Convey("When calculating distance with different dimention matrices", func() { vectorX.TCopy(vectorX) So(func() { chebyshev.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) }) }) }
func TestCranberrra(t *testing.T) { var vectorX, vectorY *mat64.Dense cranberra := NewCranberra() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := cranberra.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(5, 1, []float64{1, 2, 3, 4, 9}) vectorY = mat64.NewDense(5, 1, []float64{-5, -6, 7, 4, 3}) Convey("When calculating distance with two vectors", func() { result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with row vectors", func() { vectorX.TCopy(vectorX) vectorY.TCopy(vectorY) result := cranberra.Distance(vectorX, vectorY) Convey("The result should be 2.9", func() { So(result, ShouldEqual, 2.9) }) }) Convey("When calculating distance with different dimention matrices", func() { vectorX.TCopy(vectorX) So(func() { cranberra.Distance(vectorX, vectorY) }, ShouldPanic) }) }) }
func TestManhattan(t *testing.T) { var vectorX, vectorY *mat64.Dense manhattan := NewManhattan() Convey("Given two vectors that are same", t, func() { vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := manhattan.Distance(vec, vec) Convey("The result should be 0", func() { So(distance, ShouldEqual, 0) }) }) Convey("Given two vectors", t, func() { vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) Convey("When calculating distance with column vectors", func() { result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with row vectors", func() { vectorX.TCopy(vectorX) vectorY.TCopy(vectorY) result := manhattan.Distance(vectorX, vectorY) Convey("The result should be 5", func() { So(result, ShouldEqual, 5) }) }) Convey("When calculating distance with different dimention matrices", func() { vectorX.TCopy(vectorX) So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) }) }) }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. The matrix returned will be symmetric and square. // // The weights wts should have the length equal to the number of rows in // input data matrix x. If c is nil, then a new matrix with appropriate size will // be constructed. If c is not nil, it should be a square matrix with the same // number of columns as the input data matrix x, and it will be used as the receiver // for the covariance data. Weights cannot be negative. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. // TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change // the returned type from a *mat.Dense to a *mat.Symmetric. r, c := x.Dims() if cov == nil { cov = mat64.NewDense(c, c, nil) } else if covr, covc := cov.Dims(); covr != covc || covc != c { panic(mat64.ErrShape) } var xt mat64.Dense xt.TCopy(x) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(wts) != len(v), so // we don't have to check the size later. mean := Mean(v, wts) floats.AddConst(-mean, v) } var n float64 if wts == nil { n = float64(r) cov.MulTrans(&xt, false, &xt, true) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range wts { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor. n = floats.Sum(wts) cov.MulTrans(&xt, false, &xt, true) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := mat64.QR(explVariables) q := qr.Q() reg := qr.R() var transposed, qty mat64.Dense transposed.TCopy(q) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }