func TestDecisionTree(test *testing.T) {
	x := db.NewNumericFeature(nil)
	x.Add(0, 1, 2, 3, 4, 5, 6, 7)

	y := db.NewNumericFeature(nil)
	y.Add(1, 2, 1, 2, 1, 2, 1, 2)

	z := db.NewNumericFeature(nil)
	z.Add(1, 1, 1, 1, 2, 2, 2, 2)

	t := db.NewNumericFeature(nil)
	t.Add(3, 0, 3, 1, 7, 6, 5, -1)

	dtFeatures := []db.OrderedFeature{}
	for _, f := range []*db.NumericFeature{x, y, z} {
		dtFeatures = append(dtFeatures, f)
	}

	dt := db.NewDecisionTreeRegressor()
	dt.Fit(dtFeatures, t)

	tEstimate := dt.Predict([]db.Feature{x, y, z})

	if len(tEstimate) != t.Len() {
		test.Errorf("dt.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len())
	}

	for i, te := range tEstimate {
		if te != t.Value(i) {
			test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value(i))
		}
	}

	fmt.Printf("%v\n", dt.Importances())
}
Beispiel #2
0
func (handler *Handler) Header(header []string) {
	handler.header = header
	handler.columns = make([]*db.NumericFeature, len(header))
	for i := range handler.columns {
		handler.columns[i] = db.NewNumericFeature(nil)
	}
}
Beispiel #3
0
func featureFactory(s string) db.DataFrameFeature {
	var feature db.Feature
	switch s[0] {
	case 'C':
		feature = db.NewCategoricalFeature(db.NewStringTable())
	default:
		feature = db.NewNumericFeature(nil)
	}
	return db.NewDataFrameFeature(s, feature)
}
Beispiel #4
0
func TestNumericFeature(t *testing.T) {
	// Assign to Feature to ensure NumericFeature implements Feature
	var f db.Feature = db.NewNumericFeature(nil)
	// Assign it back so we can use its NumericFeature methods
	var nf = f.(*db.NumericFeature)

	for _, s := range []string{"3.0", "2.0", "non-numeric"} {
		nf.AddFromString(s)
	}

	for _, v := range []float64{1.0, 4.0} {
		nf.Add(v)
	}

	// Make sure integers work
	nf.Add(5)

	check := func(index int, expected float64) {
		actual := nf.NumericValue(index)
		altActual := nf.Value(index).(float64)
		if actual != altActual && math.IsNaN(actual) != math.IsNaN(altActual) {
			t.Errorf("NumericValue(%d) is %g; Value(%d) is %v", index, actual, index, altActual)
		}
		if actual != expected && math.IsNaN(actual) != math.IsNaN(expected) {
			t.Errorf("Get(%d) got %g; expecting %g", index, actual, expected)
		}
	}

	check(0, 3.0)
	check(1, 2.0)
	check(2, math.NaN())
	check(3, 1.0)
	check(4, 4.0)
	check(5, 5.0)

	ordercheck := func(index int, expected int) {
		orderedIndex := nf.InOrder(index)
		if orderedIndex != expected {
			t.Errorf("Get(%d) got %d; expecting %d", index, orderedIndex, expected)
		}
	}

	nf.Prepare()

	ordercheck(0, 3)
	ordercheck(1, 1)
	ordercheck(2, 0)
	ordercheck(3, 4)
	ordercheck(4, 5)
	ordercheck(5, 2)

	if nf.Len() != 6 {
		t.Errorf("Len() returned %d; expecting %d", nf.Len(), 5)
	}
}
func TestRandomForest(test *testing.T) {
	x := db.NewNumericFeature(nil)
	x.Add(0, 1, 2, 3, 4, 5, 6, 7)

	y := db.NewNumericFeature(nil)
	y.Add(1, 2, 1, 2, 1, 2, 1, 2)

	z := db.NewNumericFeature(nil)
	z.Add(1, 1, 1, 1, 2, 2, 2, 2)

	t := db.NewNumericFeature(nil)
	//	t.Add(3, 0, 3, 1, 7, 6, 5, -1)
	t.Add(0, 0, 0, 0, 1, 1, 1, 1)

	rfFeatures := []db.OrderedFeature{}
	for _, f := range []*db.NumericFeature{x, y, z} {
		rfFeatures = append(rfFeatures, f)
	}

	rf := db.NewRandomForestRegressor(10)
	oob := rf.Fit(rfFeatures, t)

	tEstimate := rf.Predict([]db.Feature{x, y, z})

	if len(tEstimate) != t.Len() {
		test.Errorf("rf.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len())
	}

	fmt.Printf("oob preds: %v\n", oob)

	for i, te := range tEstimate {
		fmt.Printf("predicted: %v; actual: %v\n", te, t.Value(i))
		//		if te != t.Value(i) {
		//			test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value///(i))
		//		}
	}

	fmt.Printf("feature importances (forest): %v\n", rf.Importances())
}
Beispiel #6
0
func TestDataFrame(t *testing.T) {
	names := []string{"a", "b", "c", "d"}

	data := [][]interface{}{
		{"1.0", "2.0", "3.0", "4.0"},
		{1.1, 2.2, 3.3, 4.4}}

	expected := [][]float64{
		{1.0, 2.0, 3.0, 4.0},
		{1.1, 2.2, 3.3, 4.4}}

	nf := db.NewDataFrame()
	for i := 0; i < 4; i++ {
		nf.AddFeature(db.NewDataFrameFeature(names[i], db.NewNumericFeature(nil)))
	}

	if nf.Length() != 0 {
		t.Error("Length should be 0")
	}

	for _, v := range data {
		nf.AddRow(v)
	}

	if nf.Length() != 2 {
		t.Error("Length should be 2")
	}

	if nf.Width() != 4 {
		t.Error("Columns() should return 4")
	}

	for i := 0; i < nf.Length(); i++ {
		for j := 0; j < nf.Width(); j++ {
			if nf.Get(i, j) != expected[i][j] {
				t.Error(fmt.Sprintf("Cell (%d, %d): got %v; expected %v",
					i, j, nf.Get(i, j), expected[i][j]))
			}
		}
	}

	for j := 0; j < nf.Width(); j++ {
		if nf.ColumnName(j) != names[j] {
			fmt.Println(nf.ColumnName(j))
			t.Error(fmt.Sprintf(`Column name: got "%v"; expected "%v"`, nf.ColumnName(j), names[j]))
		}
	}
}