func main() { var tree base.Classifier rand.Seed(time.Now().UTC().UnixNano()) // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.99) filt.AddAllNumericAttributes() filt.Build() filt.Run(iris) // Create a 60-40 training-test split insts := base.InstancesTrainTestSplit(iris, 0.60) // // First up, use ID3 // tree = trees.NewID3DecisionTree(0.6) // (Parameter controls train-prune split.) // Train the ID3 tree tree.Fit(insts[0]) // Generate predictions predictions := tree.Predict(insts[1]) // Evaluate fmt.Println("ID3 Performance") cf := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Next up, Random Trees // // Consider two randomly-chosen attributes tree = trees.NewRandomTree(2) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomTree Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Finally, Random Forests // tree = ensemble.NewRandomForest(100, 3) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomForest Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) }
func main() { // Load in a dataset, with headers. Header attributes will be stored. // Think of instances as a Data Frame structure in R or Pandas. // You can also create instances from scratch. rawData, err := base.ParseCSVToInstances("datasets/iris.csv", false) if err != nil { panic(err) } // Print a pleasant summary of your data. fmt.Println(rawData) //Initialises a new KNN classifier cls := knn.NewKnnClassifier("euclidean", 2) //Do a training-test split trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50) cls.Fit(trainData) //Calculates the Euclidean distance and returns the most popular label predictions := cls.Predict(testData) fmt.Println(predictions) // Prints precision/recall metrics confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(confusionMat)) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() trainDataf := base.NewLazilyFilteredInstances(trainData, filt) testDataf := base.NewLazilyFilteredInstances(testData, filt) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(trainDataf) fmt.Println(rf) predictions := rf.Predict(testDataf) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testDataf, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) insts := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(insts[1]) filt.Run(insts[0]) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(insts[0]) fmt.Println(rf) predictions := rf.Predict(insts[1]) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestPruning(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() fmt.Println(testData) filt.Run(testData) filt.Run(trainData) root := NewRandomTree(2) fittrainData, fittestData := base.InstancesTrainTestSplit(trainData, 0.6) root.Fit(fittrainData) root.Prune(fittestData) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestPredict(t *testing.T) { a := NewAveragePerceptron(10, 1.2, 0.5, 0.3) if a == nil { t.Errorf("Unable to create average perceptron") } absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv") rawData, err := base.ParseCSVToInstances(absPath, true) if err != nil { t.Fail() } trainData, testData := base.InstancesTrainTestSplit(rawData, 0.5) a.Fit(trainData) if a.trained == false { t.Errorf("Perceptron was not trained") } predictions := a.Predict(testData) cf, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { t.Errorf("Couldn't get confusion matrix: %s", err) t.Fail() } fmt.Println(evaluation.GetSummary(cf)) fmt.Println(trainData) fmt.Println(testData) if evaluation.GetAccuracy(cf) < 0.65 { t.Errorf("Perceptron not trained correctly") } }
func BenchmarkKNNWithNoOpts(b *testing.B) { // Load train, test := readMnist() cls := NewKnnClassifier("euclidean", 1) cls.AllowOptimisations = false cls.Fit(train) predictions := cls.Predict(test) c, err := evaluation.GetConfusionMatrix(test, predictions) if err != nil { panic(err) } fmt.Println(evaluation.GetSummary(c)) fmt.Println(evaluation.GetAccuracy(c)) }
func main() { data, err := base.ParseCSVToInstances("iris_headers.csv", true) if err != nil { panic(err) } cls := knn.NewKnnClassifier("euclidean", 2) trainData, testData := base.InstancesTrainTestSplit(data, 0.8) cls.Fit(trainData) predictions := cls.Predict(testData) fmt.Println(predictions) confusionMat := evaluation.GetConfusionMatrix(testData, predictions) fmt.Println(evaluation.GetSummary(confusionMat)) }
func NewTestTrial(filename string, split float64) bool { cls := knn.NewKnnClassifier("euclidean", 2) data := CSVtoKNNData(filename) train, test := base.InstancesTrainTestSplit(data, split) cls.Fit(train) //Calculates the Euclidean distance and returns the most popular label predictions := cls.Predict(test) fmt.Println(predictions) confusionMat, err := evaluation.GetConfusionMatrix(test, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(confusionMat)) return true }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.60) filt := filters.NewChiMergeFilter(trainData, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(testData) filt.Run(trainData) rf := NewRandomForest(10, 3) rf.Fit(trainData) predictions := rf.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetSummary(confusionMat)) }
func main() { rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } //Initialises a new KNN classifier cls := knn.NewKnnClassifier("euclidean", 2) //Do a training-test split trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50) cls.Fit(trainData) //Calculates the Euclidean distance and returns the most popular label predictions := cls.Predict(testData) fmt.Println(predictions) // Prints precision/recall metrics confusionMat := evaluation.GetConfusionMatrix(testData, predictions) fmt.Println(evaluation.GetSummary(confusionMat)) }
func main() { // Load and parse the data from csv files fmt.Println("Loading data...") trainData, err := base.ParseCSVToInstances("data/mnist_train.csv", true) if err != nil { panic(err) } testData, err := base.ParseCSVToInstances("data/mnist_test.csv", true) if err != nil { panic(err) } // Create a new linear SVC with some good default values classifier, err := linear_models.NewLinearSVC("l1", "l2", true, 1.0, 1e-4) if err != nil { panic(err) } // Don't output information on each iteration base.Silent() // Train the linear SVC fmt.Println("Training...") classifier.Fit(trainData) // Make predictions for the test data fmt.Println("Predicting...") predictions, err := classifier.Predict(testData) if err != nil { panic(err) } // Get a confusion matrix and print out some accuracy stats for our predictions confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(confusionMat)) }
func TestOneVsAllModel(t *testing.T) { classifierFunc := func(c string) base.Classifier { m, err := linear_models.NewLinearSVC("l1", "l2", true, 1.0, 1e-4) if err != nil { panic(err) } return m } Convey("Given data", t, func() { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) X, Y := base.InstancesTrainTestSplit(inst, 0.4) m := NewOneVsAllModel(classifierFunc) m.Fit(X) Convey("The maximum class index should be 2", func() { So(m.maxClassVal, ShouldEqual, 2) }) Convey("There should be three of everything...", func() { So(len(m.filters), ShouldEqual, 3) So(len(m.classifiers), ShouldEqual, 3) }) Convey("Predictions should work...", func() { predictions, err := m.Predict(Y) So(err, ShouldEqual, nil) cf, err := evaluation.GetConfusionMatrix(Y, predictions) So(err, ShouldEqual, nil) fmt.Println(evaluation.GetAccuracy(cf)) fmt.Println(evaluation.GetSummary(cf)) }) }) }
func TestRandomTreeClassification2(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } insts := base.InstancesTrainTestSplit(inst, 0.4) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() fmt.Println(insts[1]) filt.Run(insts[1]) filt.Run(insts[0]) root := NewRandomTree(2) root.Fit(insts[0]) fmt.Println(root) predictions := root.Predict(insts[1]) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() instf := base.NewLazilyFilteredInstances(inst, filt) trainData, testData := base.InstancesTrainTestSplit(instf, 0.60) rf := NewRandomForest(10, 3) rf.Fit(trainData) predictions := rf.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetSummary(confusionMat)) }
func TestID3Classification(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } filt := filters.NewBinningFilter(inst, 10) filt.AddAllNumericAttributes() filt.Build() filt.Run(inst) fmt.Println(inst) trainData, testData := base.InstancesTrainTestSplit(inst, 0.70) // Build the decision tree rule := new(InformationGainRuleGenerator) root := InferID3Tree(trainData, rule) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func main() { rand.Seed(4402201) rawData, err := base.ParseCSVToInstances("../datasets/house-votes-84.csv", true) if err != nil { panic(err) } //Initialises a new AveragePerceptron classifier cls := perceptron.NewAveragePerceptron(10, 1.2, 0.5, 0.3) //Do a training-test split trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50) fmt.Println(trainData) fmt.Println(testData) cls.Fit(trainData) predictions := cls.Predict(testData) // Prints precision/recall metrics confusionMat, _ := evaluation.GetConfusionMatrix(testData, predictions) fmt.Println(evaluation.GetSummary(confusionMat)) }
func TestRandomTreeClassification(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(trainData) filt.Run(testData) fmt.Println(inst) r := new(RandomTreeRuleGenerator) r.Attributes = 2 root := InferID3Tree(trainData, r) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func main() { var tree base.Classifier rand.Seed(44111342) // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.999) for _, a := range base.NonClassFloatAttributes(iris) { filt.AddAttribute(a) } filt.Train() irisf := base.NewLazilyFilteredInstances(iris, filt) // Create a 60-40 training-test split trainData, testData := base.InstancesTrainTestSplit(irisf, 0.60) // // First up, use ID3 // tree = trees.NewID3DecisionTree(0.6) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err := tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (information gain)") cf, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.InformationGainRatioRuleGenerator)) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err = tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (information gain ratio)") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.GiniCoefficientRuleGenerator)) // (Parameter controls train-prune split.) // Train the ID3 tree err = tree.Fit(trainData) if err != nil { panic(err) } // Generate predictions predictions, err = tree.Predict(testData) if err != nil { panic(err) } // Evaluate fmt.Println("ID3 Performance (gini index generator)") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) // // Next up, Random Trees // // Consider two randomly-chosen attributes tree = trees.NewRandomTree(2) err = tree.Fit(testData) if err != nil { panic(err) } predictions, err = tree.Predict(testData) if err != nil { panic(err) } fmt.Println("RandomTree Performance") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) // // Finally, Random Forests // tree = ensemble.NewRandomForest(70, 3) err = tree.Fit(trainData) if err != nil { panic(err) } predictions, err = tree.Predict(testData) if err != nil { panic(err) } fmt.Println("RandomForest Performance") cf, err = evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(cf)) }