Exemple #1
0
func (lr *LinearRegression) Fit(inst *base.Instances) error {
	if inst.Rows < inst.GetAttributeCount() {
		return NotEnoughDataError
	}

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(inst.Rows, 1, nil)
	explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil)

	for i := 0; i < inst.Rows; i++ {
		observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data

		for j := 0; j < inst.GetAttributeCount(); j++ {
			if j == 0 {
				// Set intercepts to 1.0
				// Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/
				explVariables.Set(i, 0, 1.0)
			} else {
				explVariables.Set(i, j, inst.Get(i, j-1))
			}
		}
	}

	n := inst.GetAttributeCount()
	qr := mat64.QR(explVariables)
	q := qr.Q()
	reg := qr.R()

	var transposed, qty mat64.Dense
	transposed.TCopy(q)
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true

	return nil
}
Exemple #2
0
func TestChebyshev(t *testing.T) {
	var vectorX, vectorY *mat64.Dense
	chebyshev := NewChebyshev()

	Convey("Given two vectors", t, func() {
		vectorX = mat64.NewDense(4, 1, []float64{1, 2, 3, 4})
		vectorY = mat64.NewDense(4, 1, []float64{-5, -6, 7, 8})

		Convey("When calculating distance with two vectors", func() {
			result := chebyshev.Distance(vectorX, vectorY)

			Convey("The result should be 8", func() {
				So(result, ShouldEqual, 8)
			})
		})

		Convey("When calculating distance with row vectors", func() {
			vectorX.TCopy(vectorX)
			vectorY.TCopy(vectorY)
			result := chebyshev.Distance(vectorX, vectorY)

			Convey("The result should be 8", func() {
				So(result, ShouldEqual, 8)
			})
		})

		Convey("When calculating distance with different dimention matrices", func() {
			vectorX.TCopy(vectorX)
			So(func() { chebyshev.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape)
		})

	})
}
Exemple #3
0
func TestCranberrra(t *testing.T) {
	var vectorX, vectorY *mat64.Dense
	cranberra := NewCranberra()

	Convey("Given two vectors that are same", t, func() {
		vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89})
		distance := cranberra.Distance(vec, vec)

		Convey("The result should be 0", func() {
			So(distance, ShouldEqual, 0)
		})
	})

	Convey("Given two vectors", t, func() {
		vectorX = mat64.NewDense(5, 1, []float64{1, 2, 3, 4, 9})
		vectorY = mat64.NewDense(5, 1, []float64{-5, -6, 7, 4, 3})

		Convey("When calculating distance with two vectors", func() {
			result := cranberra.Distance(vectorX, vectorY)

			Convey("The result should be 2.9", func() {
				So(result, ShouldEqual, 2.9)
			})
		})

		Convey("When calculating distance with row vectors", func() {
			vectorX.TCopy(vectorX)
			vectorY.TCopy(vectorY)
			result := cranberra.Distance(vectorX, vectorY)

			Convey("The result should be 2.9", func() {
				So(result, ShouldEqual, 2.9)
			})
		})

		Convey("When calculating distance with different dimention matrices", func() {
			vectorX.TCopy(vectorX)
			So(func() { cranberra.Distance(vectorX, vectorY) }, ShouldPanic)
		})

	})
}
Exemple #4
0
func TestManhattan(t *testing.T) {
	var vectorX, vectorY *mat64.Dense
	manhattan := NewManhattan()

	Convey("Given two vectors that are same", t, func() {
		vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89})
		distance := manhattan.Distance(vec, vec)

		Convey("The result should be 0", func() {
			So(distance, ShouldEqual, 0)
		})
	})

	Convey("Given two vectors", t, func() {
		vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3})
		vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5})

		Convey("When calculating distance with column vectors", func() {
			result := manhattan.Distance(vectorX, vectorY)

			Convey("The result should be 5", func() {
				So(result, ShouldEqual, 5)
			})
		})

		Convey("When calculating distance with row vectors", func() {
			vectorX.TCopy(vectorX)
			vectorY.TCopy(vectorY)
			result := manhattan.Distance(vectorX, vectorY)

			Convey("The result should be 5", func() {
				So(result, ShouldEqual, 5)
			})
		})

		Convey("When calculating distance with different dimention matrices", func() {
			vectorX.TCopy(vectorX)
			So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape)
		})

	})
}
Exemple #5
0
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm. The matrix returned will be symmetric and square.
//
// The weights wts should have the length equal to the number of rows in
// input data matrix x. If c is nil, then a new matrix with appropriate size will
// be constructed.  If c is not nil, it should be a square matrix with the same
// number of columns as the input data matrix x, and it will be used as the receiver
// for the covariance data.  Weights cannot be negative.
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
	// This is the matrix version of the two-pass algorithm. It doesn't use the
	// additional floating point error correction that the Covariance function uses
	// to reduce the impact of rounding during centering.

	// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
	// the returned type from a *mat.Dense to a *mat.Symmetric.

	r, c := x.Dims()

	if cov == nil {
		cov = mat64.NewDense(c, c, nil)
	} else if covr, covc := cov.Dims(); covr != covc || covc != c {
		panic(mat64.ErrShape)
	}

	var xt mat64.Dense
	xt.TCopy(x)
	// Subtract the mean of each of the columns.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		// This will panic with ErrShape if len(wts) != len(v), so
		// we don't have to check the size later.
		mean := Mean(v, wts)
		floats.AddConst(-mean, v)
	}

	var n float64
	if wts == nil {

		n = float64(r)

		cov.MulTrans(&xt, false, &xt, true)

		// Scale by the sample size.
		cov.Scale(1/(n-1), cov)
		return cov
	}

	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
	sqrtwts := make([]float64, r)
	for i, w := range wts {
		if w < 0 {
			panic("stat: negative covariance matrix weights")
		}
		sqrtwts[i] = math.Sqrt(w)
	}
	// Weight the rows.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		floats.Mul(v, sqrtwts)
	}

	// Calculate the normalization factor.
	n = floats.Sum(wts)
	cov.MulTrans(&xt, false, &xt, true)

	// Scale by the sample size.
	cov.Scale(1/(n-1), cov)
	return cov
}
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error {

	// Retrieve row size
	_, rows := inst.Size()

	// Validate class Attribute count
	classAttrs := inst.AllClassAttributes()
	if len(classAttrs) != 1 {
		return fmt.Errorf("Only 1 class variable is permitted")
	}
	classAttrSpecs := base.ResolveAttributes(inst, classAttrs)

	// Retrieve relevant Attributes
	allAttrs := base.NonClassAttributes(inst)
	attrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if _, ok := a.(*base.FloatAttribute); ok {
			attrs = append(attrs, a)
		}
	}

	cols := len(attrs) + 1

	if rows < cols {
		return NotEnoughDataError
	}

	// Retrieve relevant Attribute specifications
	attrSpecs := base.ResolveAttributes(inst, attrs)

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(rows, 1, nil)
	explVariables := mat64.NewDense(rows, cols, nil)

	// Build the observed matrix
	inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) {
		val := base.UnpackBytesToFloat(row[0])
		observed.Set(i, 0, val)
		return true, nil
	})

	// Build the explainatory variables
	inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
		// Set intercepts to 1.0
		explVariables.Set(i, 0, 1.0)
		for j, r := range row {
			explVariables.Set(i, j+1, base.UnpackBytesToFloat(r))
		}
		return true, nil
	})

	n := cols
	qr := mat64.QR(explVariables)
	q := qr.Q()
	reg := qr.R()

	var transposed, qty mat64.Dense
	transposed.TCopy(q)
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true
	lr.attrs = attrs
	lr.cls = classAttrs[0]
	return nil
}