// TestCSV func main() { var reader io.Reader var err error if reader, err = os.Open("pima-indians-diabetes.csv"); err != nil { log.Fatalf("%v", err) } handler := &Handler{} err = db.Import(reader, handler) targetBool := make([]bool, handler.target.Len()) targetNumeric := make([]float64, handler.target.Len()) for i := 0; i < handler.target.Len(); i++ { targetBool[i] = handler.target.NumericValue(i) == 1.0 targetNumeric[i] = handler.target.NumericValue(i) } if err != nil { log.Fatal("Import returned error") } fmt.Printf("target: %v\n", handler.target) rf := db.NewRandomForestRegressor(100) oobScores := rf.Fit(handler.features, handler.target) auc := db.ROCArea(oobScores, targetBool) mse := db.MSE(oobScores, targetNumeric) variance := stats.Variance(targetNumeric) fmt.Printf("ROCArea: %v MSE: %v Var: %v\n", auc, mse, variance) }
func TestRandomForest(test *testing.T) { x := db.NewNumericFeature(nil) x.Add(0, 1, 2, 3, 4, 5, 6, 7) y := db.NewNumericFeature(nil) y.Add(1, 2, 1, 2, 1, 2, 1, 2) z := db.NewNumericFeature(nil) z.Add(1, 1, 1, 1, 2, 2, 2, 2) t := db.NewNumericFeature(nil) // t.Add(3, 0, 3, 1, 7, 6, 5, -1) t.Add(0, 0, 0, 0, 1, 1, 1, 1) rfFeatures := []db.OrderedFeature{} for _, f := range []*db.NumericFeature{x, y, z} { rfFeatures = append(rfFeatures, f) } rf := db.NewRandomForestRegressor(10) oob := rf.Fit(rfFeatures, t) tEstimate := rf.Predict([]db.Feature{x, y, z}) if len(tEstimate) != t.Len() { test.Errorf("rf.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len()) } fmt.Printf("oob preds: %v\n", oob) for i, te := range tEstimate { fmt.Printf("predicted: %v; actual: %v\n", te, t.Value(i)) // if te != t.Value(i) { // test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value///(i)) // } } fmt.Printf("feature importances (forest): %v\n", rf.Importances()) }