Beispiel #1
0
// TestCSV
func main() {
	var reader io.Reader
	var err error
	if reader, err = os.Open("pima-indians-diabetes.csv"); err != nil {
		log.Fatalf("%v", err)
	}

	handler := &Handler{}
	err = db.Import(reader, handler)

	targetBool := make([]bool, handler.target.Len())
	targetNumeric := make([]float64, handler.target.Len())
	for i := 0; i < handler.target.Len(); i++ {
		targetBool[i] = handler.target.NumericValue(i) == 1.0
		targetNumeric[i] = handler.target.NumericValue(i)
	}

	if err != nil {
		log.Fatal("Import returned error")
	}

	fmt.Printf("target: %v\n", handler.target)

	rf := db.NewRandomForestRegressor(100)

	oobScores := rf.Fit(handler.features, handler.target)

	auc := db.ROCArea(oobScores, targetBool)
	mse := db.MSE(oobScores, targetNumeric)
	variance := stats.Variance(targetNumeric)

	fmt.Printf("ROCArea: %v MSE: %v Var: %v\n", auc, mse, variance)
}
func TestRandomForest(test *testing.T) {
	x := db.NewNumericFeature(nil)
	x.Add(0, 1, 2, 3, 4, 5, 6, 7)

	y := db.NewNumericFeature(nil)
	y.Add(1, 2, 1, 2, 1, 2, 1, 2)

	z := db.NewNumericFeature(nil)
	z.Add(1, 1, 1, 1, 2, 2, 2, 2)

	t := db.NewNumericFeature(nil)
	//	t.Add(3, 0, 3, 1, 7, 6, 5, -1)
	t.Add(0, 0, 0, 0, 1, 1, 1, 1)

	rfFeatures := []db.OrderedFeature{}
	for _, f := range []*db.NumericFeature{x, y, z} {
		rfFeatures = append(rfFeatures, f)
	}

	rf := db.NewRandomForestRegressor(10)
	oob := rf.Fit(rfFeatures, t)

	tEstimate := rf.Predict([]db.Feature{x, y, z})

	if len(tEstimate) != t.Len() {
		test.Errorf("rf.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len())
	}

	fmt.Printf("oob preds: %v\n", oob)

	for i, te := range tEstimate {
		fmt.Printf("predicted: %v; actual: %v\n", te, t.Value(i))
		//		if te != t.Value(i) {
		//			test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value///(i))
		//		}
	}

	fmt.Printf("feature importances (forest): %v\n", rf.Importances())
}