// Predictions returns the prediction of each row of the 'data' passed in. // It make a prediction by calling lr.Predict on each row of the data. // If it fails to make a prediction it arbitrarly sets the result to 0 // func (lr *LinearRegression) Predictions(data [][]float64) ([]float64, error) { var err error var predictions []float64 for i := 0; i < len(data); i++ { x := []float64{} // append x0 x = append(x, 1) x = append(x, data[i]...) if lr.HasTransform { if x, err = lr.TransformFunction(x); err != nil { return nil, err } } gi, err := lr.Predict(x) if err != nil { predictions = append(predictions, 0) continue } if ml.Sign(gi) == float64(1) { predictions = append(predictions, 1) } else { predictions = append(predictions, 0) } } return predictions, nil }
// CompareInSample returns the number of points that are different between // the current hypothesis function learned by the linear regression with respect to 'f' // func (lr *LinearRegression) CompareInSample(f linear.Function) float64 { gInSample := make([]float64, len(lr.Xn)) fInSample := make([]float64, len(lr.Xn)) for i := 0; i < len(lr.Xn); i++ { gi, err := lr.Predict(lr.Xn[i]) if err != nil { // force difference because of error gInSample[i] = 0 fInSample[i] = f(lr.Xn[i][1:]) continue } gInSample[i] = ml.Sign(gi) fInSample[i] = f(lr.Xn[i][1:]) } // measure difference: diff := 0 for i := 0; i < len(lr.Xn); i++ { if gInSample[i] != fInSample[i] { diff++ } } return float64(diff) / float64(len(lr.Xn)) }
// CompareOutOfSample returns the number of points that are different between the // current hypothesis function learned by the linear regression with respect to // 'f', the linear function passed as paral. The comparison is made on out of sample points // generated randomly in the defined interval. // func (lr *LinearRegression) CompareOutOfSample(f linear.Function) float64 { outOfSample := 1000 diff := 0 for i := 0; i < outOfSample; i++ { //var oY int oX := make([]float64, lr.VectorSize) oX[0] = float64(1) for j := 1; j < len(oX); j++ { oX[j] = lr.Interval.RandFloat() } gi, err := lr.Predict(oX) if err != nil { diff++ continue } if ml.Sign(gi) != f(oX[1:]) { diff++ } } return float64(diff) / float64(outOfSample) }
// Ein returns the in sample error of the current linear regression model. // It is the fraction of in sample points which got misclassified. // todo(santiaago): change this to gi = d[i]*Yn // func (lr *LinearRegression) Ein() float64 { if lr.ComputedEin { return lr.ein } // XnWn gInSample := make([]float64, len(lr.Xn)) for i := 0; i < len(lr.Xn); i++ { gi, err := lr.Predict(lr.Xn[i]) if err != nil { continue } gInSample[i] = ml.Sign(gi) } nEin := 0 for i := 0; i < len(gInSample); i++ { if gInSample[i] != lr.Yn[i] { nEin++ } } ein := float64(nEin) / float64(len(gInSample)) lr.ComputedEin = true lr.ein = ein return ein }
// Eout returns the out of sample error. // It is the fraction of out of sample points which got misclassified. // It generates 1000 out of sample points and classifies them. // func (lr *LinearRegression) Eout() float64 { outOfSample := 1000 numError := 0 for i := 0; i < outOfSample; i++ { oX := make([]float64, lr.VectorSize) oX[0] = 1 for j := 1; j < len(oX); j++ { oX[j] = lr.Interval.RandFloat() } // output with potential noise in 'flip' variable var oY float64 oY = evaluate(lr.TargetFunction, oX) * lr.flip() var gi float64 gi, err := lr.Predict(oX) if err != nil { numError++ continue } if ml.Sign(gi) != oY { numError++ } } return float64(numError) / float64(outOfSample) }
// EAugIn is the fraction of "in sample points" which got misclassified plus the term // lambda / N * Sum(Wi^2) // todo(santiaago): change this to use vector vector. // func (lr *LogisticRegression) EAugIn() float64 { gInSample := make([]float64, len(lr.Xn)) for i := 0; i < len(lr.Xn); i++ { gi := float64(0) for j := 0; j < len(lr.Xn[0]); j++ { gi += lr.Xn[i][j] * lr.WReg[j] } gInSample[i] = ml.Sign(gi) } nEin := 0 for i := 0; i < len(gInSample); i++ { if gInSample[i] != lr.Yn[i] { nEin++ } } wi2, err := ml.Vector(lr.WReg).Dot(lr.WReg) if err != nil { log.Println("skiping regularizer step due to %v", err) wi2 = 1 } reg := (lr.Lambda / float64(len(lr.WReg))) * wi2 return float64(nEin)/float64(len(gInSample)) + reg }
// EoutFromFile returns error in the out of sample data provided in the file. // It only supports linear regressions with transformed data. // todo:(santiaago) make this more generic. // func (lr *LinearRegression) EoutFromFile(filename string) (float64, error) { file, err := os.Open(filename) if err != nil { log.Fatal(err) } defer file.Close() numError := 0 n := 0 scanner := bufio.NewScanner(file) for scanner.Scan() { split := strings.Split(scanner.Text(), " ") var line []string for _, s := range split { cell := strings.Replace(s, " ", "", -1) if len(cell) > 0 { line = append(line, cell) } } var oX1, oX2, oY float64 if oX1, err = strconv.ParseFloat(line[0], 64); err != nil { return 0, err } if oX2, err = strconv.ParseFloat(line[1], 64); err != nil { return 0, err } if oY, err = strconv.ParseFloat(line[2], 64); err != nil { return 0, err } oX, err := lr.TransformFunction([]float64{1, oX1, oX2}) if err != nil { numError++ n++ continue } gi, err := lr.Predict(oX) if err != nil { numError++ n++ continue } if ml.Sign(gi) != oY { numError++ } n++ } if err := scanner.Err(); err != nil { log.Fatal(err) } return float64(numError) / float64(n), nil }
// Ecv returns the leave one out cross validation // in sample error of the current logistic regression model. // func (lr *LogisticRegression) Ecv() float64 { if lr.ComputedEcv { return lr.ecv } trainingPoints := lr.TrainingPoints x := lr.Xn y := lr.Yn nEcv := 0 for out := range lr.Xn { fmt.Printf("\rLeave %v out of %v", out, len(lr.Xn)) outx, outy := lr.Xn[out], lr.Yn[out] nlr := NewLogisticRegression() nlr.TrainingPoints = lr.TrainingPoints - 1 nlr.Wn = make([]float64, lr.VectorSize) nlr.VectorSize = lr.VectorSize nlr.Xn = [][]float64{} nlr.Yn = []float64{} for i := range x { if i == out { continue } nlr.Xn = append(nlr.Xn, x[i]) nlr.Yn = append(nlr.Yn, y[i]) } if nlr.IsRegularized { if err := nlr.LearnRegularized(); err != nil { log.Println("LearnRegularized error", err) trainingPoints-- continue } nlr.Wn = nlr.WReg } else { if err := nlr.Learn(); err != nil { log.Println("Learn error", err) trainingPoints-- continue } } gi, err := nlr.Predict(outx) if err != nil { nEcv++ continue } if ml.Sign(gi) != outy { nEcv++ } } ecv := float64(nEcv) / float64(lr.TrainingPoints) lr.ComputedEcv = true lr.ecv = ecv return ecv }
// Ecv returns the leave one out cross validation // in sample error of the current linear regression model. // func (lr *LinearRegression) Ecv() float64 { if lr.ComputedEcv { return lr.ecv } trainingPoints := lr.TrainingPoints x := lr.Xn y := lr.Yn nEcv := 0 for out := range lr.Xn { fmt.Printf("\rLeave %v out of %v", out, len(lr.Xn)) outx, outy := x[out], y[out] nlr := NewLinearRegression() *nlr = *lr nlr.ComputedEcv = false nlr.TrainingPoints = lr.TrainingPoints - 1 nlr.Xn = [][]float64{} nlr.Yn = []float64{} for i := range x { if i == out { continue } nlr.Xn = append(nlr.Xn, x[i]) nlr.Yn = append(nlr.Yn, y[i]) } if lr.IsRegularized { if err := nlr.LearnWeightDecay(); err != nil { log.Println("LearnWeightDecay error", err) trainingPoints-- continue } nlr.Wn = nlr.WReg } else { if err := nlr.Learn(); err != nil { log.Println("Learn error", err) trainingPoints-- continue } } gi, err := nlr.Predict(outx) if err != nil { log.Println("Predict error", err) trainingPoints-- continue } if ml.Sign(gi) != outy { nEcv++ } } ecv := float64(nEcv) / float64(trainingPoints) lr.ComputedEcv = true lr.ecv = ecv return ecv }
// EAugOutFromFile returns the augmented error from an out of sample file // func (lr *LinearRegression) EAugOutFromFile(filename string) (float64, error) { file, err := os.Open(filename) if err != nil { log.Fatal(err) } defer file.Close() numError := 0 n := 0 scanner := bufio.NewScanner(file) for scanner.Scan() { split := strings.Split(scanner.Text(), " ") var line []string for _, s := range split { cell := strings.Replace(s, " ", "", -1) if len(cell) > 0 { line = append(line, cell) } } var oY float64 var oX1, oX2 float64 if oX1, err = strconv.ParseFloat(line[0], 64); err != nil { return 0, err } if oX2, err = strconv.ParseFloat(line[1], 64); err != nil { return 0, err } var oX []float64 if oX, err = lr.TransformFunction([]float64{1, oX1, oX2}); err != nil { return 0, err } if oY, err = strconv.ParseFloat(line[2], 64); err != nil { return 0, err } gi := float64(0) for j := 0; j < len(oX); j++ { gi += oX[j] * lr.WReg[j] } if ml.Sign(gi) != oY { numError++ } n++ } return float64(numError) / float64(n), nil }
// Ecv returns the leave one out cross validation // in sample error of the current svm model. // func (svm *SVM) Ecv() float64 { trainingPoints := svm.TrainingPoints x := svm.Xn y := svm.Yn nEcv := 0 for out := range svm.Xn { fmt.Printf("\rLeave %v out of %v", out, len(svm.Xn)) outx, outy := x[out], y[out] nsvm := NewSVM() *nsvm = *svm nsvm.TrainingPoints = svm.TrainingPoints - 1 nsvm.Xn = [][]float64{} nsvm.Yn = []float64{} for i := range x { if i == out { continue } nsvm.Xn = append(nsvm.Xn, x[i]) nsvm.Yn = append(nsvm.Yn, y[i]) } if err := nsvm.Learn(); err != nil { log.Println("Learn error", err) trainingPoints-- continue } gi, err := nsvm.Predict(outx) if err != nil { log.Println("Predict error", err) trainingPoints-- continue } if ml.Sign(gi) != outy { nEcv++ } } ecv := float64(nEcv) / float64(trainingPoints) return ecv }
// EValIn returns the in sample error of the Validation points. // It is the fraction of misclassified points present in the Validation set XVal. // func (lr *LinearRegression) EValIn() float64 { gInSample := make([]float64, len(lr.XVal)) for i := 0; i < len(lr.XVal); i++ { gi, err := lr.Predict(lr.XVal[i]) if err != nil { continue } gInSample[i] = ml.Sign(gi) } nEin := 0 for i := 0; i < len(gInSample); i++ { if gInSample[i] != lr.YVal[i] { nEin++ } } return float64(nEin) / float64(len(gInSample)) }
// EAugIn is the fraction of "in sample points" which got misclassified plus the term // lambda / N * Sum(Wi^2) // todo(santiaago): change this to use vector vector. // todo(santiaago): add term lambda / N * Sum(Wi^2) // func (lr *LinearRegression) EAugIn() float64 { gInSample := make([]float64, len(lr.Xn)) for i := 0; i < len(lr.Xn); i++ { gi := float64(0) for j := 0; j < len(lr.Xn[0]); j++ { gi += lr.Xn[i][j] * lr.WReg[j] } gInSample[i] = ml.Sign(gi) } nEin := 0 for i := 0; i < len(gInSample); i++ { if gInSample[i] != lr.Yn[i] { nEin++ } } return float64(nEin) / float64(len(gInSample)) }
// Ein returns the in sample error of the current svm model. // It is the fraction of in sample points which got misclassified. // func (svm *SVM) Ein() float64 { // XnWn gInSample := make([]float64, len(svm.Xn)) for i := 0; i < len(svm.Xn); i++ { gi, err := svm.Predict(svm.Xn[i]) if err != nil { continue } gInSample[i] = ml.Sign(gi) } nEin := 0 for i := 0; i < len(gInSample); i++ { if gInSample[i] != svm.Yn[i] { nEin++ } } ein := float64(nEin) / float64(len(gInSample)) return ein }
func e(x ...float64) float64 { x1 := x[0] x2 := x[1] return ml.Sign(-1 - 0.05*x1 + 0.08*x2 + 1.5*x1*x2 + 0.15*x1*x1 + 0.15*x2*x2) }
// non linear transformation func f(x ...float64) float64 { x1 := x[0] x2 := x[1] return ml.Sign(x1*x1 + x2*x2 - 0.6) }