func main() { var tree base.Classifier rand.Seed(44111342) // Load in the iris dataset iris, err := base.ParseCSVToInstances("/home/kralli/go/src/github.com/sjwhitworth/golearn/examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.999) for _, a := range base.NonClassFloatAttributes(iris) { filt.AddAttribute(a) } filt.Train() irisf := base.NewLazilyFilteredInstances(iris, filt) // Create a 60-40 training-test split //testData trainData, _ := base.InstancesTrainTestSplit(iris, 0.60) findBestSplit(trainData) //fmt.Println(trainData) //fmt.Println(testData) fmt.Println(tree) fmt.Println(irisf) }
func BenchmarkBaggingRandomForestPredict(t *testing.B) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { t.Fatal("Unable to parse CSV to instances: %s", err.Error()) } rand.Seed(time.Now().UnixNano()) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() instf := base.NewLazilyFilteredInstances(inst, filt) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(instf) t.ResetTimer() for i := 0; i < 20; i++ { rf.Predict(instf) } }
func TestPruning(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() fmt.Println(testData) filt.Run(testData) filt.Run(trainData) root := NewRandomTree(2) fittrainData, fittestData := base.InstancesTrainTestSplit(trainData, 0.6) root.Fit(fittrainData) root.Prune(fittestData) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func main() { var tree base.Classifier rand.Seed(time.Now().UTC().UnixNano()) // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.99) filt.AddAllNumericAttributes() filt.Build() filt.Run(iris) // Create a 60-40 training-test split insts := base.InstancesTrainTestSplit(iris, 0.60) // // First up, use ID3 // tree = trees.NewID3DecisionTree(0.6) // (Parameter controls train-prune split.) // Train the ID3 tree tree.Fit(insts[0]) // Generate predictions predictions := tree.Predict(insts[1]) // Evaluate fmt.Println("ID3 Performance") cf := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Next up, Random Trees // // Consider two randomly-chosen attributes tree = trees.NewRandomTree(2) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomTree Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Finally, Random Forests // tree = ensemble.NewRandomForest(100, 3) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomForest Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() trainDataf := base.NewLazilyFilteredInstances(trainData, filt) testDataf := base.NewLazilyFilteredInstances(testData, filt) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(trainDataf) fmt.Println(rf) predictions := rf.Predict(testDataf) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testDataf, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) insts := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(insts[1]) filt.Run(insts[0]) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(insts[0]) fmt.Println(rf) predictions := rf.Predict(insts[1]) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomTree(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(inst) fmt.Println(inst) r := new(RandomTreeRuleGenerator) r.Attributes = 2 root := InferID3Tree(inst, r) fmt.Println(root) }
func TestRandomTreeClassificationAfterDiscretisation(t *testing.T) { Convey("Predictions on filtered data with a Random Tree", t, func() { instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) trainData, testData := base.InstancesTrainTestSplit(instances, 0.6) filter := filters.NewChiMergeFilter(instances, 0.9) for _, a := range base.NonClassFloatAttributes(instances) { filter.AddAttribute(a) } filter.Train() filteredTrainData := base.NewLazilyFilteredInstances(trainData, filter) filteredTestData := base.NewLazilyFilteredInstances(testData, filter) verifyTreeClassification(filteredTrainData, filteredTestData) }) }
func TestRandomForest(t *testing.T) { Convey("Given a valid CSV file", t, func() { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) Convey("When Chi-Merge filtering the data", func() { filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() instf := base.NewLazilyFilteredInstances(inst, filt) Convey("Splitting the data into test and training sets", func() { trainData, testData := base.InstancesTrainTestSplit(instf, 0.60) Convey("Fitting and predicting with a Random Forest", func() { rf := NewRandomForest(10, 3) err = rf.Fit(trainData) So(err, ShouldBeNil) predictions, err := rf.Predict(testData) So(err, ShouldBeNil) confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions) So(err, ShouldBeNil) Convey("Predictions should be somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMat), ShouldBeGreaterThan, 0.35) }) }) }) }) Convey("Fitting with a Random Forest with too many features compared to the data", func() { rf := NewRandomForest(10, len(base.NonClassAttributes(inst))+1) err = rf.Fit(inst) Convey("Should return an error", func() { So(err, ShouldNotBeNil) }) }) }) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.60) filt := filters.NewChiMergeFilter(trainData, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(testData) filt.Run(trainData) rf := NewRandomForest(10, 3) rf.Fit(trainData) predictions := rf.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetSummary(confusionMat)) }
func BenchmarkBaggingRandomForestFit(testEnv *testing.B) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(inst) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } testEnv.ResetTimer() for i := 0; i < 20; i++ { rf.Fit(inst) } }
func TestBaggedModelRandomForest(t *testing.T) { Convey("Given data", t, func() { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) Convey("Splitting the data into training and test data", func() { trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) Convey("Filtering the split datasets", func() { rand.Seed(time.Now().UnixNano()) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() trainDataf := base.NewLazilyFilteredInstances(trainData, filt) testDataf := base.NewLazilyFilteredInstances(testData, filt) Convey("Fitting and Predicting with a Bagged Model of 10 Random Trees", func() { rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(trainDataf) predictions := rf.Predict(testDataf) confusionMat, err := evaluation.GetConfusionMatrix(testDataf, predictions) So(err, ShouldBeNil) Convey("Predictions are somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMat), ShouldBeGreaterThan, 0.5) }) }) }) }) }) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() instf := base.NewLazilyFilteredInstances(inst, filt) trainData, testData := base.InstancesTrainTestSplit(instf, 0.60) rf := NewRandomForest(10, 3) rf.Fit(trainData) predictions := rf.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomTreeClassification2(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } insts := base.InstancesTrainTestSplit(inst, 0.4) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() fmt.Println(insts[1]) filt.Run(insts[1]) filt.Run(insts[0]) root := NewRandomTree(2) root.Fit(insts[0]) fmt.Println(root) predictions := root.Predict(insts[1]) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomTreeClassification(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(trainData) filt.Run(testData) fmt.Println(inst) r := new(RandomTreeRuleGenerator) r.Attributes = 2 root := InferID3Tree(trainData, r) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func main() { var tree base.Classifier rand.Seed(44111342) // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.999) for _, a := range base.NonClassFloatAttributes(iris) { filt.AddAttribute(a) } filt.Train() irisf := base.NewLazilyFilteredInstances(iris, filt) // Create a 60-40 training-test split trainData, testData := base.InstancesTrainTestSplit(irisf, 0.60) // // First up, use ID3 // tree = trees.NewID3DecisionTree(0.6) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err := tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (information gain)") cf, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.InformationGainRatioRuleGenerator)) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err = tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (information gain ratio)") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.GiniCoefficientRuleGenerator)) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err = tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (gini index generator)") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) // // Next up, Random Trees // // Consider two randomly-chosen attributes tree = trees.NewRandomTree(2) err = tree.Fit(testData) if err != nil { panic(err) } predictions, err = tree.Predict(testData) if err != nil { panic(err) } fmt.Println("RandomTree Performance") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) // // Finally, Random Forests // tree = ensemble.NewRandomForest(70, 3) err = tree.Fit(trainData) if err != nil { panic(err) } predictions, err = tree.Predict(testData) if err != nil { panic(err) } fmt.Println("RandomForest Performance") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) }
func TestRandomTreeClassification(t *testing.T) { Convey("Predictions on filtered data with a Random Tree", t, func() { instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) trainData, testData := base.InstancesTrainTestSplit(instances, 0.6) filter := filters.NewChiMergeFilter(instances, 0.9) for _, a := range base.NonClassFloatAttributes(instances) { filter.AddAttribute(a) } filter.Train() filteredTrainData := base.NewLazilyFilteredInstances(trainData, filter) filteredTestData := base.NewLazilyFilteredInstances(testData, filter) Convey("Using InferID3Tree to create the tree and do the fitting", func() { Convey("Using a RandomTreeRule", func() { randomTreeRuleGenerator := new(RandomTreeRuleGenerator) randomTreeRuleGenerator.Attributes = 2 root := InferID3Tree(filteredTrainData, randomTreeRuleGenerator) Convey("Predicting with the tree", func() { predictions, err := root.Predict(filteredTestData) So(err, ShouldBeNil) confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions) So(err, ShouldBeNil) Convey("Predictions should be somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5) }) }) }) Convey("Using a InformationGainRule", func() { informationGainRuleGenerator := new(InformationGainRuleGenerator) root := InferID3Tree(filteredTrainData, informationGainRuleGenerator) Convey("Predicting with the tree", func() { predictions, err := root.Predict(filteredTestData) So(err, ShouldBeNil) confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions) So(err, ShouldBeNil) Convey("Predictions should be somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5) }) }) }) }) Convey("Using NewRandomTree to create the tree", func() { root := NewRandomTree(2) Convey("Fitting with the tree", func() { err = root.Fit(filteredTrainData) So(err, ShouldBeNil) Convey("Predicting with the tree, *without* pruning first", func() { predictions, err := root.Predict(filteredTestData) So(err, ShouldBeNil) confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions) So(err, ShouldBeNil) Convey("Predictions should be somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5) }) }) Convey("Predicting with the tree, pruning first", func() { root.Prune(filteredTestData) predictions, err := root.Predict(filteredTestData) So(err, ShouldBeNil) confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions) So(err, ShouldBeNil) Convey("Predictions should be somewhat accurate", func() { So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.4) }) }) }) }) }) }