func TestDecisionTree(test *testing.T) { x := db.NewNumericFeature(nil) x.Add(0, 1, 2, 3, 4, 5, 6, 7) y := db.NewNumericFeature(nil) y.Add(1, 2, 1, 2, 1, 2, 1, 2) z := db.NewNumericFeature(nil) z.Add(1, 1, 1, 1, 2, 2, 2, 2) t := db.NewNumericFeature(nil) t.Add(3, 0, 3, 1, 7, 6, 5, -1) dtFeatures := []db.OrderedFeature{} for _, f := range []*db.NumericFeature{x, y, z} { dtFeatures = append(dtFeatures, f) } dt := db.NewDecisionTreeRegressor() dt.Fit(dtFeatures, t) tEstimate := dt.Predict([]db.Feature{x, y, z}) if len(tEstimate) != t.Len() { test.Errorf("dt.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len()) } for i, te := range tEstimate { if te != t.Value(i) { test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value(i)) } } fmt.Printf("%v\n", dt.Importances()) }
func (handler *Handler) Header(header []string) { handler.header = header handler.columns = make([]*db.NumericFeature, len(header)) for i := range handler.columns { handler.columns[i] = db.NewNumericFeature(nil) } }
func featureFactory(s string) db.DataFrameFeature { var feature db.Feature switch s[0] { case 'C': feature = db.NewCategoricalFeature(db.NewStringTable()) default: feature = db.NewNumericFeature(nil) } return db.NewDataFrameFeature(s, feature) }
func TestNumericFeature(t *testing.T) { // Assign to Feature to ensure NumericFeature implements Feature var f db.Feature = db.NewNumericFeature(nil) // Assign it back so we can use its NumericFeature methods var nf = f.(*db.NumericFeature) for _, s := range []string{"3.0", "2.0", "non-numeric"} { nf.AddFromString(s) } for _, v := range []float64{1.0, 4.0} { nf.Add(v) } // Make sure integers work nf.Add(5) check := func(index int, expected float64) { actual := nf.NumericValue(index) altActual := nf.Value(index).(float64) if actual != altActual && math.IsNaN(actual) != math.IsNaN(altActual) { t.Errorf("NumericValue(%d) is %g; Value(%d) is %v", index, actual, index, altActual) } if actual != expected && math.IsNaN(actual) != math.IsNaN(expected) { t.Errorf("Get(%d) got %g; expecting %g", index, actual, expected) } } check(0, 3.0) check(1, 2.0) check(2, math.NaN()) check(3, 1.0) check(4, 4.0) check(5, 5.0) ordercheck := func(index int, expected int) { orderedIndex := nf.InOrder(index) if orderedIndex != expected { t.Errorf("Get(%d) got %d; expecting %d", index, orderedIndex, expected) } } nf.Prepare() ordercheck(0, 3) ordercheck(1, 1) ordercheck(2, 0) ordercheck(3, 4) ordercheck(4, 5) ordercheck(5, 2) if nf.Len() != 6 { t.Errorf("Len() returned %d; expecting %d", nf.Len(), 5) } }
func TestRandomForest(test *testing.T) { x := db.NewNumericFeature(nil) x.Add(0, 1, 2, 3, 4, 5, 6, 7) y := db.NewNumericFeature(nil) y.Add(1, 2, 1, 2, 1, 2, 1, 2) z := db.NewNumericFeature(nil) z.Add(1, 1, 1, 1, 2, 2, 2, 2) t := db.NewNumericFeature(nil) // t.Add(3, 0, 3, 1, 7, 6, 5, -1) t.Add(0, 0, 0, 0, 1, 1, 1, 1) rfFeatures := []db.OrderedFeature{} for _, f := range []*db.NumericFeature{x, y, z} { rfFeatures = append(rfFeatures, f) } rf := db.NewRandomForestRegressor(10) oob := rf.Fit(rfFeatures, t) tEstimate := rf.Predict([]db.Feature{x, y, z}) if len(tEstimate) != t.Len() { test.Errorf("rf.Predict() returned result of length: %d; expected %d", len(tEstimate), t.Len()) } fmt.Printf("oob preds: %v\n", oob) for i, te := range tEstimate { fmt.Printf("predicted: %v; actual: %v\n", te, t.Value(i)) // if te != t.Value(i) { // test.Errorf("Row %d: predicted %v; actual %v", i, te, t.Value///(i)) // } } fmt.Printf("feature importances (forest): %v\n", rf.Importances()) }
func TestDataFrame(t *testing.T) { names := []string{"a", "b", "c", "d"} data := [][]interface{}{ {"1.0", "2.0", "3.0", "4.0"}, {1.1, 2.2, 3.3, 4.4}} expected := [][]float64{ {1.0, 2.0, 3.0, 4.0}, {1.1, 2.2, 3.3, 4.4}} nf := db.NewDataFrame() for i := 0; i < 4; i++ { nf.AddFeature(db.NewDataFrameFeature(names[i], db.NewNumericFeature(nil))) } if nf.Length() != 0 { t.Error("Length should be 0") } for _, v := range data { nf.AddRow(v) } if nf.Length() != 2 { t.Error("Length should be 2") } if nf.Width() != 4 { t.Error("Columns() should return 4") } for i := 0; i < nf.Length(); i++ { for j := 0; j < nf.Width(); j++ { if nf.Get(i, j) != expected[i][j] { t.Error(fmt.Sprintf("Cell (%d, %d): got %v; expected %v", i, j, nf.Get(i, j), expected[i][j])) } } } for j := 0; j < nf.Width(); j++ { if nf.ColumnName(j) != names[j] { fmt.Println(nf.ColumnName(j)) t.Error(fmt.Sprintf(`Column name: got "%v"; expected "%v"`, nf.ColumnName(j), names[j])) } } }