func TestLogisticRegression(t *testing.T) { Convey("Given labels, a classifier and data", t, func() { // Load data X, err := base.ParseCSVToInstances("train.csv", false) So(err, ShouldEqual, nil) Y, err := base.ParseCSVToInstances("test.csv", false) So(err, ShouldEqual, nil) // Setup the problem lr := NewLogisticRegression("l2", 1.0, 1e-6) lr.Fit(X) Convey("When predicting the label of first vector", func() { Z := lr.Predict(Y) Convey("The result should be 1", func() { So(Z.RowString(0), ShouldEqual, "1.00") }) }) Convey("When predicting the label of second vector", func() { Z := lr.Predict(Y) Convey("The result should be -1", func() { So(Z.RowString(1), ShouldEqual, "-1.00") }) }) }) }
func TestKnnClassifier(t *testing.T) { Convey("Given labels, a classifier and data", t, func() { trainingData, err := base.ParseCSVToInstances("knn_train.csv", false) So(err, ShouldBeNil) testingData, err := base.ParseCSVToInstances("knn_test.csv", false) So(err, ShouldBeNil) cls := NewKnnClassifier("euclidean", 2) cls.Fit(trainingData) predictions := cls.Predict(testingData) So(predictions, ShouldNotEqual, nil) Convey("When predicting the label for our first vector", func() { result := base.GetClass(predictions, 0) Convey("The result should be 'blue", func() { So(result, ShouldEqual, "blue") }) }) Convey("When predicting the label for our second vector", func() { result2 := base.GetClass(predictions, 1) Convey("The result should be 'red", func() { So(result2, ShouldEqual, "red") }) }) }) }
func TestKnnClassifier(t *testing.T) { Convey("Given labels, a classifier and data", t, func() { trainingData, err1 := base.ParseCSVToInstances("knn_train.csv", false) testingData, err2 := base.ParseCSVToInstances("knn_test.csv", false) if err1 != nil { t.Error(err1) return } if err2 != nil { t.Error(err2) return } cls := NewKnnClassifier("euclidean", 2) cls.Fit(trainingData) predictions := cls.Predict(testingData) Convey("When predicting the label for our first vector", func() { result := predictions.GetClass(0) Convey("The result should be 'blue", func() { So(result, ShouldEqual, "blue") }) }) Convey("When predicting the label for our first vector", func() { result2 := predictions.GetClass(1) Convey("The result should be 'red", func() { So(result2, ShouldEqual, "red") }) }) }) }
func TestLinearRegression(t *testing.T) { Convey("Doing a linear regression", t, func() { lr := NewLinearRegression() Convey("With no training data", func() { Convey("Predicting", func() { testData, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true) So(err, ShouldBeNil) _, err = lr.Predict(testData) Convey("Should result in a NoTrainingDataError", func() { So(err, ShouldEqual, NoTrainingDataError) }) }) }) Convey("With not enough training data", func() { trainingDatum, err := base.ParseCSVToInstances("../examples/datasets/exam.csv", true) So(err, ShouldBeNil) Convey("Fitting", func() { err = lr.Fit(trainingDatum) Convey("Should result in a NotEnoughDataError", func() { So(err, ShouldEqual, NotEnoughDataError) }) }) }) Convey("With sufficient training data", func() { instances, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true) So(err, ShouldBeNil) trainData, testData := base.InstancesTrainTestSplit(instances, 0.1) Convey("Fitting and Predicting", func() { err := lr.Fit(trainData) So(err, ShouldBeNil) predictions, err := lr.Predict(testData) So(err, ShouldBeNil) Convey("It makes reasonable predictions", func() { _, rows := predictions.Size() for i := 0; i < rows; i++ { actualValue, _ := strconv.ParseFloat(base.GetClass(testData, i), 64) expectedValue, _ := strconv.ParseFloat(base.GetClass(predictions, i), 64) So(actualValue, ShouldAlmostEqual, expectedValue, actualValue*0.05) } }) }) }) }) }
func BenchmarkLinearRegressionOneRow(b *testing.B) { // Omits error handling in favor of brevity trainData, _ := base.ParseCSVToInstances("../examples/datasets/exams.csv", true) testData, _ := base.ParseCSVToInstances("../examples/datasets/exam.csv", true) lr := NewLinearRegression() lr.Fit(trainData) b.ResetTimer() for n := 0; n < b.N; n++ { lr.Predict(testData) } }
func TestBinning(t *testing.T) { Convey("Given some data and a reference", t, func() { // Read the data inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true) if err != nil { panic(err) } // // Construct the binning filter binAttr := inst1.AllAttributes()[0] filt := NewBinningFilter(inst1, 10) filt.AddAttribute(binAttr) filt.Train() inst1f := base.NewLazilyFilteredInstances(inst1, filt) // Retrieve the categorical version of the original Attribute var cAttr base.Attribute for _, a := range inst1f.AllAttributes() { if a.GetName() == binAttr.GetName() { cAttr = a } } cAttrSpec, err := inst1f.GetAttribute(cAttr) So(err, ShouldEqual, nil) binAttrSpec, err := inst2.GetAttribute(binAttr) So(err, ShouldEqual, nil) // // Create the LazilyFilteredInstances // and check the values Convey("Discretized version should match reference", func() { _, rows := inst1.Size() for i := 0; i < rows; i++ { val1 := inst1f.Get(cAttrSpec, i) val2 := inst2.Get(binAttrSpec, i) val1s := cAttr.GetStringFromSysVal(val1) val2s := binAttr.GetStringFromSysVal(val2) So(val1s, ShouldEqual, val2s) } }) }) }
func TestChiMergeDiscretization(t *testing.T) { Convey("Chi-Merge Discretization", t, func() { chimDatasetPath := "../examples/datasets/chim.csv" Convey(fmt.Sprintf("With the '%s' dataset", chimDatasetPath), func() { instances, err := base.ParseCSVToInstances(chimDatasetPath, true) So(err, ShouldBeNil) _, rows := instances.Size() frequencies := chiMerge(instances, instances.AllAttributes()[0], 0.9, 0, rows) values := []float64{} for _, entry := range frequencies { values = append(values, entry.Value) } Convey("Computes frequencies correctly", func() { So(values, ShouldResemble, []float64{1.3, 56.2, 87.1}) }) }) irisHeadersDatasetpath := "../examples/datasets/iris_headers.csv" Convey(fmt.Sprintf("With the '%s' dataset", irisHeadersDatasetpath), func() { instances, err := base.ParseCSVToInstances(irisHeadersDatasetpath, true) So(err, ShouldBeNil) Convey("Sorting the instances first", func() { allAttributes := instances.AllAttributes() sortedAttributesSpecs := base.ResolveAttributes(instances, allAttributes)[0:1] sortedInstances, err := base.Sort(instances, base.Ascending, sortedAttributesSpecs) So(err, ShouldBeNil) _, rows := sortedInstances.Size() frequencies := chiMerge(sortedInstances, sortedInstances.AllAttributes()[0], 0.9, 0, rows) values := []float64{} for _, entry := range frequencies { values = append(values, entry.Value) } Convey("Computes frequencies correctly", func() { So(values, ShouldResemble, []float64{4.3, 5.5, 5.8, 6.3, 7.1}) }) }) }) }) }
func TestDBSCANDistanceQuery(t *testing.T) { Convey("Should be able to determine which points are in range...", t, func() { // Read in the synthetic test data inst, err := base.ParseCSVToInstances("synthetic.csv", false) So(err, ShouldBeNil) // Create a neighbours vector neighbours := big.NewInt(0) // Compute pairwise distances dist, err := computePairwiseDistances(inst, inst.AllAttributes(), pairwise.NewEuclidean()) So(dist.At(0, 0), ShouldAlmostEqual, 0) So(dist.At(0, 1), ShouldAlmostEqual, 1) So(dist.At(1, 0), ShouldAlmostEqual, 1) So(dist.At(0, 2), ShouldAlmostEqual, math.Sqrt(5)) So(dist.At(2, 0), ShouldAlmostEqual, math.Sqrt(5)) So(err, ShouldBeNil) // Do the region query neighbours = regionQuery(0, neighbours, dist, 1) So(neighbours.Bit(0), ShouldEqual, 1) So(neighbours.Bit(1), ShouldEqual, 1) So(neighbours.Bit(2), ShouldEqual, 0) So(neighbours.Bit(3), ShouldEqual, 0) So(neighbours.Bit(4), ShouldEqual, 0) }) }
func TestDBSCANSynthetic(t *testing.T) { Convey("Synthetic DBSCAN test should work...", t, func() { inst, err := base.ParseCSVToInstances("synthetic.csv", false) So(err, ShouldBeNil) p := DBSCANParameters{ ClusterParameters{ inst.AllAttributes(), pairwise.NewEuclidean(), }, 1, 1, } m, err := DBSCAN(inst, p) So(err, ShouldBeNil) So(len(m), ShouldEqual, 2) So(m[1], ShouldContain, 0) So(m[1], ShouldContain, 1) So(m[1], ShouldContain, 2) So(m[1], ShouldContain, 3) }) }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) insts := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() filt.Run(insts[1]) filt.Run(insts[0]) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(insts[0]) fmt.Println(rf) predictions := rf.Predict(insts[1]) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestChiMergeFilter(t *testing.T) { Convey("Chi-Merge Filter", t, func() { // See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf // Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992 instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) Convey("Create and train the filter", func() { filter := NewChiMergeFilter(instances, 0.90) filter.AddAttribute(instances.AllAttributes()[0]) filter.AddAttribute(instances.AllAttributes()[1]) filter.Train() Convey("Filter the dataset", func() { filteredInstances := base.NewLazilyFilteredInstances(instances, filter) classAttributes := filteredInstances.AllClassAttributes() Convey("There should only be one class attribute", func() { So(len(classAttributes), ShouldEqual, 1) }) expectedClassAttribute := "Species" Convey(fmt.Sprintf("The class attribute should be %s", expectedClassAttribute), func() { So(classAttributes[0].GetName(), ShouldEqual, expectedClassAttribute) }) }) }) }) }
func main() { // Load in a dataset, with headers. Header attributes will be stored. // Think of instances as a Data Frame structure in R or Pandas. // You can also create instances from scratch. rawData, err := base.ParseCSVToInstances("datasets/iris.csv", false) if err != nil { panic(err) } // Print a pleasant summary of your data. fmt.Println(rawData) //Initialises a new KNN classifier cls := knn.NewKnnClassifier("euclidean", 2) //Do a training-test split trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50) cls.Fit(trainData) //Calculates the Euclidean distance and returns the most popular label predictions := cls.Predict(testData) fmt.Println(predictions) // Prints precision/recall metrics confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error())) } fmt.Println(evaluation.GetSummary(confusionMat)) }
func main() { var tree base.Classifier rand.Seed(time.Now().UTC().UnixNano()) // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.99) filt.AddAllNumericAttributes() filt.Build() filt.Run(iris) // Create a 60-40 training-test split insts := base.InstancesTrainTestSplit(iris, 0.60) // // First up, use ID3 // tree = trees.NewID3DecisionTree(0.6) // (Parameter controls train-prune split.) // Train the ID3 tree tree.Fit(insts[0]) // Generate predictions predictions := tree.Predict(insts[1]) // Evaluate fmt.Println("ID3 Performance") cf := eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Next up, Random Trees // // Consider two randomly-chosen attributes tree = trees.NewRandomTree(2) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomTree Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) // // Finally, Random Forests // tree = ensemble.NewRandomForest(100, 3) tree.Fit(insts[0]) predictions = tree.Predict(insts[1]) fmt.Println("RandomForest Performance") cf = eval.GetConfusionMatrix(insts[1], predictions) fmt.Println(eval.GetSummary(cf)) }
func BenchmarkBaggingRandomForestPredict(t *testing.B) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { t.Fatal("Unable to parse CSV to instances: %s", err.Error()) } rand.Seed(time.Now().UnixNano()) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() instf := base.NewLazilyFilteredInstances(inst, filt) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(instf) t.ResetTimer() for i := 0; i < 20; i++ { rf.Predict(instf) } }
func main() { var tree base.Classifier rand.Seed(44111342) // Load in the iris dataset iris, err := base.ParseCSVToInstances("/home/kralli/go/src/github.com/sjwhitworth/golearn/examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } // Discretise the iris dataset with Chi-Merge filt := filters.NewChiMergeFilter(iris, 0.999) for _, a := range base.NonClassFloatAttributes(iris) { filt.AddAttribute(a) } filt.Train() irisf := base.NewLazilyFilteredInstances(iris, filt) // Create a 60-40 training-test split //testData trainData, _ := base.InstancesTrainTestSplit(iris, 0.60) findBestSplit(trainData) //fmt.Println(trainData) //fmt.Println(testData) fmt.Println(tree) fmt.Println(irisf) }
func TestPruning(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) filt.AddAllNumericAttributes() filt.Build() fmt.Println(testData) filt.Run(testData) filt.Run(trainData) root := NewRandomTree(2) fittrainData, fittestData := base.InstancesTrainTestSplit(trainData, 0.6) root.Fit(fittrainData) root.Prune(fittestData) fmt.Println(root) predictions := root.Predict(testData) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testData, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestBinaryFilterClassPreservation(t *testing.T) { Convey("Given a contrived dataset...", t, func() { // Read the contrived dataset inst, err := base.ParseCSVToInstances("./binary_test.csv", true) So(err, ShouldEqual, nil) // Add all Attributes to the filter bFilt := NewBinaryConvertFilter() bAttrs := inst.AllAttributes() for _, a := range bAttrs { bFilt.AddAttribute(a) } bFilt.Train() // Construct a LazilyFilteredInstances to handle it instF := base.NewLazilyFilteredInstances(inst, bFilt) Convey("All the expected class Attributes should be present if discretised...", func() { attrMap := make(map[string]bool) attrMap["arbitraryClass_hi"] = false attrMap["arbitraryClass_there"] = false attrMap["arbitraryClass_world"] = false for _, a := range instF.AllClassAttributes() { attrMap[a.GetName()] = true } So(attrMap["arbitraryClass_hi"], ShouldEqual, true) So(attrMap["arbitraryClass_there"], ShouldEqual, true) So(attrMap["arbitraryClass_world"], ShouldEqual, true) }) }) }
func CSVtoKNNData(filename string) base.FixedDataGrid { rawData, err := base.ParseCSVToInstances(filename, true) if err != nil { panic(err) } return rawData }
func main() { var tree base.Classifier // Load in the iris dataset iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } for i := 1; i < 60; i += 2 { // Demonstrate the effect of adding more trees to the forest // and also how much better it is without discretisation. rand.Seed(44111342) tree = ensemble.NewRandomForest(i, 4) cfs, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(iris, tree, 5) if err != nil { panic(err) } mean, variance := evaluation.GetCrossValidatedMetric(cfs, evaluation.GetAccuracy) stdev := math.Sqrt(variance) fmt.Printf("%d\t%.2f\t(+/- %.2f)\n", i, mean, stdev*2) } }
func TestRandomForest1(testEnv *testing.T) { inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } rand.Seed(time.Now().UnixNano()) trainData, testData := base.InstancesTrainTestSplit(inst, 0.6) filt := filters.NewChiMergeFilter(inst, 0.90) for _, a := range base.NonClassFloatAttributes(inst) { filt.AddAttribute(a) } filt.Train() trainDataf := base.NewLazilyFilteredInstances(trainData, filt) testDataf := base.NewLazilyFilteredInstances(testData, filt) rf := new(BaggedModel) for i := 0; i < 10; i++ { rf.AddModel(trees.NewRandomTree(2)) } rf.Fit(trainDataf) fmt.Println(rf) predictions := rf.Predict(testDataf) fmt.Println(predictions) confusionMat := eval.GetConfusionMatrix(testDataf, predictions) fmt.Println(confusionMat) fmt.Println(eval.GetMacroPrecision(confusionMat)) fmt.Println(eval.GetMacroRecall(confusionMat)) fmt.Println(eval.GetSummary(confusionMat)) }
func TestChiMerge2(testEnv *testing.T) { // // See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf // Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992 inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } attrs := make([]int, 1) attrs[0] = 0 inst.Sort(base.Ascending, attrs) freq := chiMerge(inst, 0, 0.90, 0, inst.Rows) if len(freq) != 5 { testEnv.Error("Wrong length (%d)", len(freq)) testEnv.Error(freq) } if freq[0].Value != 4.3 { testEnv.Error(freq[0]) } if freq[1].Value != 5.5 { testEnv.Error(freq[1]) } if freq[2].Value != 5.8 { testEnv.Error(freq[2]) } if freq[3].Value != 6.3 { testEnv.Error(freq[3]) } if freq[4].Value != 7.1 { testEnv.Error(freq[4]) } }
func TestPredict(t *testing.T) { a := NewAveragePerceptron(10, 1.2, 0.5, 0.3) if a == nil { t.Errorf("Unable to create average perceptron") } absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv") rawData, err := base.ParseCSVToInstances(absPath, true) if err != nil { t.Fail() } trainData, testData := base.InstancesTrainTestSplit(rawData, 0.5) a.Fit(trainData) if a.trained == false { t.Errorf("Perceptron was not trained") } predictions := a.Predict(testData) cf, err := evaluation.GetConfusionMatrix(testData, predictions) if err != nil { t.Errorf("Couldn't get confusion matrix: %s", err) t.Fail() } fmt.Println(evaluation.GetSummary(cf)) fmt.Println(trainData) fmt.Println(testData) if evaluation.GetAccuracy(cf) < 0.65 { t.Errorf("Perceptron not trained correctly") } }
func TestLinearRegression(t *testing.T) { lr := NewLinearRegression() rawData, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true) if err != nil { t.Fatal(err) } trainData, testData := base.InstancesTrainTestSplit(rawData, 0.1) err = lr.Fit(trainData) if err != nil { t.Fatal(err) } predictions, err := lr.Predict(testData) if err != nil { t.Fatal(err) } _, rows := predictions.Size() for i := 0; i < rows; i++ { fmt.Printf("Expected: %s || Predicted: %s\n", base.GetClass(testData, i), base.GetClass(predictions, i)) } }
func TestChiMergeFrequencyTable(t *testing.T) { Convey("Chi-Merge Frequency Table", t, func() { instances, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true) So(err, ShouldBeNil) frequencyTable := ChiMBuildFrequencyTable(instances.AllAttributes()[0], instances) Convey("Computes frequencies correctly", func() { So(frequencyTable[0].Frequency["c1"], ShouldEqual, 1) So(frequencyTable[0].Frequency["c3"], ShouldEqual, 4) So(frequencyTable[10].Frequency["c2"], ShouldEqual, 1) }) Convey("Counts classes correctly", func() { classes := chiCountClasses(frequencyTable) So(classes["c1"], ShouldEqual, 27) So(classes["c2"], ShouldEqual, 12) So(classes["c3"], ShouldEqual, 21) }) Convey("Computes statistics correctly", func() { So(chiComputeStatistic(frequencyTable[5], frequencyTable[6]), ShouldAlmostEqual, 1.89, 0.01) So(chiComputeStatistic(frequencyTable[1], frequencyTable[2]), ShouldAlmostEqual, 1.08, 0.01) }) }) }
func main() { // Instances can be read using ParseCsvToInstances rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Instances can be printed, and you'll see a human-readable summary // if you do so. The first section is a line like // Instances with 150 row(s) and 5 attribute(s) // // It next prints all the attributes // FloatAttribute(Sepal length) // FloatAttribute(Sepal width) // FloatAttribute(Petal length) // FloatAttribute(Petal width) // CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica]) // The final attribute has an asterisk (*) printed before it, // meaning that it is the class variable. It then prints out up to // 30 rows which correspond to those attributes. // 5.10 3.50 1.40 0.20 Iris-setosa // 4.90 3.00 1.40 0.20 Iris-setosa fmt.Println(rawData) // If two decimal places isn't enough, you can update the // Precision field on any FloatAttribute if attr, ok := rawData.GetAttr(0).(*base.FloatAttribute); !ok { panic("Invalid cast") } else { attr.Precision = 4 } // Now the first column has more precision fmt.Println(rawData) // We can update the set of Instances, although the API // for doing so is not very sophisticated. rawData.SetAttrStr(0, 0, "1.00") rawData.SetAttrStr(0, rawData.ClassIndex, "Iris-unusual") fmt.Println(rawData) // There is a way of creating new Instances from scratch. // Inside an Instance, everything's stored as float64 newData := make([]float64, 2) newData[0] = 1.0 newData[1] = 0.0 // Let's create some attributes attrs := make([]base.Attribute, 2) attrs[0] = base.NewFloatAttribute() attrs[0].SetName("Arbitrary Float Quantity") attrs[1] = new(base.CategoricalAttribute) attrs[1].SetName("Class") // Insert a standard class attrs[1].GetSysValFromString("A") // Now let's create the final instances set newInst := base.NewInstancesFromRaw(attrs, 1, newData) fmt.Println(newInst) }
func TestDBSCAN(t *testing.T) { Convey("Loading some data and labels...", t, func() { inst, err := base.ParseCSVToInstances("dbscan.csv", false) So(err, ShouldBeNil) file, err := os.Open("dbscan_labels.csv") defer file.Close() So(err, ShouldBeNil) clusterMap := ClusterMap(make(map[int][]int)) scanner := bufio.NewScanner(file) line := -1 for scanner.Scan() { line = line + 1 v, err := strconv.ParseInt(scanner.Text(), 10, 64) if err != nil { panic(err) } v = v + 1 // -1 are noise in scikit-learn's DBSCAN c := int(v) if c == 0 { continue } if _, ok := clusterMap[c]; !ok { clusterMap[c] = make([]int, 0) } clusterMap[c] = append(clusterMap[c], line) } Convey("Our DBSCAN implementation should match...", func() { p := DBSCANParameters{ ClusterParameters{ inst.AllAttributes(), pairwise.NewEuclidean(), }, 0.3, 10, } m, err := DBSCAN(inst, p) Convey("There should be nothing in the result that's smaller than MinPts", func() { for id := range m { So(len(m[id]), ShouldBeGreaterThanOrEqualTo, 10) } }) So(err, ShouldBeNil) eq, err := clusterMap.Equals(m) So(err, ShouldBeNil) So(eq, ShouldBeTrue) }) }) }
func TestRandomTreeClassificationWithoutDiscretisation(t *testing.T) { Convey("Predictions on filtered data with a Random Tree", t, func() { instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) So(err, ShouldBeNil) trainData, testData := base.InstancesTrainTestSplit(instances, 0.6) verifyTreeClassification(trainData, testData) }) }
func BenchmarkFit(b *testing.B) { a := NewAveragePerceptron(10, 1.2, 0.5, 0.3) absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv") rawData, _ := base.ParseCSVToInstances(absPath, true) trainData, _ := base.InstancesTrainTestSplit(rawData, 0.5) b.ResetTimer() for i := 0; i < b.N; i++ { a.Fit(trainData) } }
func TestBinning(testEnv *testing.T) { inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true) inst3, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } filt := NewBinningFilter(inst1, 10) filt.AddAttribute(inst1.GetAttr(0)) filt.Build() filt.Run(inst1) for i := 0; i < inst1.Rows; i++ { val1 := inst1.Get(i, 0) val2 := inst2.Get(i, 0) val3 := inst3.Get(i, 0) if math.Abs(val1-val2) >= 1 { testEnv.Error(val1, val2, val3, i) } } }
func TestID3(testEnv *testing.T) { // Import the "PlayTennis" dataset inst, err := base.ParseCSVToInstances("../examples/datasets/tennis.csv", true) if err != nil { panic(err) } // Build the decision tree tree := NewID3DecisionTree(0.0) tree.Fit(inst) root := tree.Root // Verify the tree // First attribute should be "outlook" if root.SplitAttr.GetName() != "outlook" { testEnv.Error(root) } sunnyChild := root.Children["sunny"] overcastChild := root.Children["overcast"] rainyChild := root.Children["rainy"] if sunnyChild.SplitAttr.GetName() != "humidity" { testEnv.Error(sunnyChild) } if rainyChild.SplitAttr.GetName() != "windy" { testEnv.Error(rainyChild) } if overcastChild.SplitAttr != nil { testEnv.Error(overcastChild) } sunnyLeafHigh := sunnyChild.Children["high"] sunnyLeafNormal := sunnyChild.Children["normal"] if sunnyLeafHigh.Class != "no" { testEnv.Error(sunnyLeafHigh) } if sunnyLeafNormal.Class != "yes" { testEnv.Error(sunnyLeafNormal) } windyLeafFalse := rainyChild.Children["false"] windyLeafTrue := rainyChild.Children["true"] if windyLeafFalse.Class != "yes" { testEnv.Error(windyLeafFalse) } if windyLeafTrue.Class != "no" { testEnv.Error(windyLeafTrue) } if overcastChild.Class != "yes" { testEnv.Error(overcastChild) } }