// Calculates the variance-covariance matrix of the regression coefficients // defined as sigma*(XtX)-1 // Using QR decomposition: X = QR // ((QR)tQR)-1 ---> (RtQtQR)-1 ---> (RtR)-1 ---> R-1Rt-1 --> sigma*R-1Rt-1 // func (o *OLS) VarianceCovarianceMatrix() *mat64.Dense { x := mat64.DenseCopyOf(o.x.data) _, p := x.Dims() // it's easier to do things with X = QR qrFactor := mat64.QR(x) R := qrFactor.R() Rt := R.T() RtInv, err := mat64.Inverse(Rt) if err != nil { panic("Rt is not invertible") } Rinverse, err := mat64.Inverse(R) if err != nil { panic("R matrix is not invertible") } varCov := mat64.NewDense(p, p, nil) varCov.Mul(Rinverse, RtInv) // multiple each element by the mse mse := o.MeanSquaredError() mulEach := func(r, c int, v float64) float64 { return v * mse } varCov.Apply(mulEach, varCov) return varCov }
func (lr *LinearRegression) Fit(inst *base.Instances) error { if inst.Rows < inst.GetAttributeCount() { return NotEnoughDataError } // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(inst.Rows, 1, nil) explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil) for i := 0; i < inst.Rows; i++ { observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data for j := 0; j < inst.GetAttributeCount(); j++ { if j == 0 { // Set intercepts to 1.0 // Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/ explVariables.Set(i, 0, 1.0) } else { explVariables.Set(i, j, inst.Get(i, j-1)) } } } n := inst.GetAttributeCount() qr := mat64.QR(explVariables) q := qr.Q() reg := qr.R() var transposed, qty mat64.Dense transposed.TCopy(q) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true return nil }
func (o *OLS) Train(yvector []float64) error { // sanity check if len(yvector) != o.n { return DimensionError } copy(o.response, yvector) y := mat64.NewDense(len(yvector), 1, yvector) o.x.PushCol(rep(1.0, o.x.rows)) x := o.x.data // it's easier to do things with X = QR qrFactor := mat64.QR(mat64.DenseCopyOf(x)) Q := qrFactor.Q() betas := qrFactor.Solve(mat64.DenseCopyOf(y)) o.betas = betas.Col(nil, 0) if len(o.betas) != o.p { log.Printf("Unexpected dimension error. Betas: %v", o.betas) } // calculate residuals and fitted vals /* fitted := &mat64.Dense{} fitted.Mul(x, betas) o.fitted = fitted.Col(nil, 0) y.Sub(y, fitted) o.residuals = y.Col(nil, 0) */ // y_hat = Q Qt y // e = y - y_hat qqt := &mat64.Dense{} qqt.Mul(Q, Q.T()) yhat := &mat64.Dense{} yhat.Mul(qqt, y) o.fitted = yhat.Col(nil, 0) y.Sub(y, yhat) o.residuals = y.Col(nil, 0) return nil }
// Leverage Points, the diagonal of the hat matrix // H = X(X'X)^-1X' , X = QR, X' = R'Q' // = QR(R'Q'QR)-1 R'Q' // = QR(R'R)-1 R'Q' // = QRR'-1 R-1 R'Q' // = QQ' (the first p cols of Q, where X = n x p) // // Leverage points are considered large if they exceed 2p/ n func (o *OLS) LeveragePoints() []float64 { x := mat64.DenseCopyOf(o.x.data) qrf := mat64.QR(x) q := qrf.Q() H := &mat64.Dense{} H.Mul(q, q.T()) o.hat = H // get diagonal elements n, _ := q.Dims() diag := make([]float64, n) for i := 0; i < n; i++ { for j := 0; j < n; j++ { if j == i { diag[i] = H.At(i, j) } } } return diag }
func main() { x, y := givens() fmt.Printf("%.4f\n", mat64.Formatted(mat64.QR(x).Solve(y))) }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := mat64.QR(explVariables) q := qr.Q() reg := qr.R() var transposed, qty mat64.Dense transposed.TCopy(q) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }