Example #1
0
// Calculates the variance-covariance matrix of the regression coefficients
// defined as sigma*(XtX)-1
// Using QR decomposition: X = QR
// ((QR)tQR)-1 ---> (RtQtQR)-1 ---> (RtR)-1 ---> R-1Rt-1 --> sigma*R-1Rt-1
//
func (o *OLS) VarianceCovarianceMatrix() *mat64.Dense {
	x := mat64.DenseCopyOf(o.x.data)
	_, p := x.Dims()

	// it's easier to do things with X = QR
	qrFactor := mat64.QR(x)
	R := qrFactor.R()
	Rt := R.T()

	RtInv, err := mat64.Inverse(Rt)
	if err != nil {
		panic("Rt is not invertible")
	}

	Rinverse, err := mat64.Inverse(R)
	if err != nil {
		panic("R matrix is not invertible")
	}

	varCov := mat64.NewDense(p, p, nil)
	varCov.Mul(Rinverse, RtInv)

	// multiple each element by the mse
	mse := o.MeanSquaredError()
	mulEach := func(r, c int, v float64) float64 { return v * mse }
	varCov.Apply(mulEach, varCov)

	return varCov
}
Example #2
0
func (lr *LinearRegression) Fit(inst *base.Instances) error {
	if inst.Rows < inst.GetAttributeCount() {
		return NotEnoughDataError
	}

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(inst.Rows, 1, nil)
	explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil)

	for i := 0; i < inst.Rows; i++ {
		observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data

		for j := 0; j < inst.GetAttributeCount(); j++ {
			if j == 0 {
				// Set intercepts to 1.0
				// Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/
				explVariables.Set(i, 0, 1.0)
			} else {
				explVariables.Set(i, j, inst.Get(i, j-1))
			}
		}
	}

	n := inst.GetAttributeCount()
	qr := mat64.QR(explVariables)
	q := qr.Q()
	reg := qr.R()

	var transposed, qty mat64.Dense
	transposed.TCopy(q)
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true

	return nil
}
Example #3
0
func (o *OLS) Train(yvector []float64) error {
	// sanity check
	if len(yvector) != o.n {
		return DimensionError
	}

	copy(o.response, yvector)
	y := mat64.NewDense(len(yvector), 1, yvector)

	o.x.PushCol(rep(1.0, o.x.rows))
	x := o.x.data

	// it's easier to do things with X = QR
	qrFactor := mat64.QR(mat64.DenseCopyOf(x))
	Q := qrFactor.Q()

	betas := qrFactor.Solve(mat64.DenseCopyOf(y))
	o.betas = betas.Col(nil, 0)
	if len(o.betas) != o.p {
		log.Printf("Unexpected dimension error. Betas: %v", o.betas)
	}

	// calculate residuals and fitted vals
	/*
		fitted := &mat64.Dense{}
		fitted.Mul(x, betas)
		o.fitted = fitted.Col(nil, 0)
		y.Sub(y, fitted)
		o.residuals = y.Col(nil, 0)
	*/

	// y_hat = Q Qt y
	// e = y - y_hat
	qqt := &mat64.Dense{}
	qqt.Mul(Q, Q.T())
	yhat := &mat64.Dense{}
	yhat.Mul(qqt, y)
	o.fitted = yhat.Col(nil, 0)
	y.Sub(y, yhat)
	o.residuals = y.Col(nil, 0)

	return nil
}
Example #4
0
// Leverage Points, the diagonal of the hat matrix
// H = X(X'X)^-1X'  , X = QR,  X' = R'Q'
//   = QR(R'Q'QR)-1 R'Q'
//	 = QR(R'R)-1 R'Q'
//	 = QRR'-1 R-1 R'Q'
//	 = QQ' (the first p cols of Q, where X = n x p)
//
// Leverage points are considered large if they exceed 2p/ n
func (o *OLS) LeveragePoints() []float64 {
	x := mat64.DenseCopyOf(o.x.data)
	qrf := mat64.QR(x)
	q := qrf.Q()

	H := &mat64.Dense{}
	H.Mul(q, q.T())
	o.hat = H

	// get diagonal elements
	n, _ := q.Dims()
	diag := make([]float64, n)
	for i := 0; i < n; i++ {
		for j := 0; j < n; j++ {
			if j == i {
				diag[i] = H.At(i, j)
			}
		}
	}
	return diag
}
func main() {
	x, y := givens()
	fmt.Printf("%.4f\n", mat64.Formatted(mat64.QR(x).Solve(y)))
}
Example #6
0
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error {

	// Retrieve row size
	_, rows := inst.Size()

	// Validate class Attribute count
	classAttrs := inst.AllClassAttributes()
	if len(classAttrs) != 1 {
		return fmt.Errorf("Only 1 class variable is permitted")
	}
	classAttrSpecs := base.ResolveAttributes(inst, classAttrs)

	// Retrieve relevant Attributes
	allAttrs := base.NonClassAttributes(inst)
	attrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if _, ok := a.(*base.FloatAttribute); ok {
			attrs = append(attrs, a)
		}
	}

	cols := len(attrs) + 1

	if rows < cols {
		return NotEnoughDataError
	}

	// Retrieve relevant Attribute specifications
	attrSpecs := base.ResolveAttributes(inst, attrs)

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(rows, 1, nil)
	explVariables := mat64.NewDense(rows, cols, nil)

	// Build the observed matrix
	inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) {
		val := base.UnpackBytesToFloat(row[0])
		observed.Set(i, 0, val)
		return true, nil
	})

	// Build the explainatory variables
	inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
		// Set intercepts to 1.0
		explVariables.Set(i, 0, 1.0)
		for j, r := range row {
			explVariables.Set(i, j+1, base.UnpackBytesToFloat(r))
		}
		return true, nil
	})

	n := cols
	qr := mat64.QR(explVariables)
	q := qr.Q()
	reg := qr.R()

	var transposed, qty mat64.Dense
	transposed.TCopy(q)
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true
	lr.attrs = attrs
	lr.cls = classAttrs[0]
	return nil
}