Example #1
0
func TestLogisticRegression(t *testing.T) {
	Convey("Given labels, a classifier and data", t, func() {
		// Load data
		X, err := base.ParseCSVToInstances("train.csv", false)
		So(err, ShouldEqual, nil)
		Y, err := base.ParseCSVToInstances("test.csv", false)
		So(err, ShouldEqual, nil)

		// Setup the problem
		lr := NewLogisticRegression("l2", 1.0, 1e-6)
		lr.Fit(X)

		Convey("When predicting the label of first vector", func() {
			Z := lr.Predict(Y)
			Convey("The result should be 1", func() {
				So(Z.RowString(0), ShouldEqual, "1.00")
			})
		})
		Convey("When predicting the label of second vector", func() {
			Z := lr.Predict(Y)
			Convey("The result should be -1", func() {
				So(Z.RowString(1), ShouldEqual, "-1.00")
			})
		})
	})
}
Example #2
0
func TestKnnClassifier(t *testing.T) {
	Convey("Given labels, a classifier and data", t, func() {
		trainingData, err := base.ParseCSVToInstances("knn_train.csv", false)
		So(err, ShouldBeNil)

		testingData, err := base.ParseCSVToInstances("knn_test.csv", false)
		So(err, ShouldBeNil)

		cls := NewKnnClassifier("euclidean", 2)
		cls.Fit(trainingData)
		predictions := cls.Predict(testingData)
		So(predictions, ShouldNotEqual, nil)

		Convey("When predicting the label for our first vector", func() {
			result := base.GetClass(predictions, 0)
			Convey("The result should be 'blue", func() {
				So(result, ShouldEqual, "blue")
			})
		})

		Convey("When predicting the label for our second vector", func() {
			result2 := base.GetClass(predictions, 1)
			Convey("The result should be 'red", func() {
				So(result2, ShouldEqual, "red")
			})
		})
	})
}
Example #3
0
func TestKnnClassifier(t *testing.T) {
	Convey("Given labels, a classifier and data", t, func() {

		trainingData, err1 := base.ParseCSVToInstances("knn_train.csv", false)
		testingData, err2 := base.ParseCSVToInstances("knn_test.csv", false)

		if err1 != nil {
			t.Error(err1)
			return
		}
		if err2 != nil {
			t.Error(err2)
			return
		}

		cls := NewKnnClassifier("euclidean", 2)
		cls.Fit(trainingData)
		predictions := cls.Predict(testingData)

		Convey("When predicting the label for our first vector", func() {
			result := predictions.GetClass(0)
			Convey("The result should be 'blue", func() {
				So(result, ShouldEqual, "blue")
			})
		})

		Convey("When predicting the label for our first vector", func() {
			result2 := predictions.GetClass(1)
			Convey("The result should be 'red", func() {
				So(result2, ShouldEqual, "red")
			})
		})
	})
}
Example #4
0
func TestLinearRegression(t *testing.T) {
	Convey("Doing a  linear regression", t, func() {
		lr := NewLinearRegression()

		Convey("With no training data", func() {
			Convey("Predicting", func() {
				testData, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true)
				So(err, ShouldBeNil)

				_, err = lr.Predict(testData)

				Convey("Should result in a NoTrainingDataError", func() {
					So(err, ShouldEqual, NoTrainingDataError)
				})

			})
		})

		Convey("With not enough training data", func() {
			trainingDatum, err := base.ParseCSVToInstances("../examples/datasets/exam.csv", true)
			So(err, ShouldBeNil)

			Convey("Fitting", func() {
				err = lr.Fit(trainingDatum)

				Convey("Should result in a NotEnoughDataError", func() {
					So(err, ShouldEqual, NotEnoughDataError)
				})
			})
		})

		Convey("With sufficient training data", func() {
			instances, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true)
			So(err, ShouldBeNil)
			trainData, testData := base.InstancesTrainTestSplit(instances, 0.1)

			Convey("Fitting and Predicting", func() {
				err := lr.Fit(trainData)
				So(err, ShouldBeNil)

				predictions, err := lr.Predict(testData)
				So(err, ShouldBeNil)

				Convey("It makes reasonable predictions", func() {
					_, rows := predictions.Size()

					for i := 0; i < rows; i++ {
						actualValue, _ := strconv.ParseFloat(base.GetClass(testData, i), 64)
						expectedValue, _ := strconv.ParseFloat(base.GetClass(predictions, i), 64)

						So(actualValue, ShouldAlmostEqual, expectedValue, actualValue*0.05)
					}
				})
			})
		})
	})
}
Example #5
0
func BenchmarkLinearRegressionOneRow(b *testing.B) {
	// Omits error handling in favor of brevity
	trainData, _ := base.ParseCSVToInstances("../examples/datasets/exams.csv", true)
	testData, _ := base.ParseCSVToInstances("../examples/datasets/exam.csv", true)
	lr := NewLinearRegression()
	lr.Fit(trainData)

	b.ResetTimer()
	for n := 0; n < b.N; n++ {
		lr.Predict(testData)
	}
}
Example #6
0
func TestBinning(t *testing.T) {
	Convey("Given some data and a reference", t, func() {
		// Read the data
		inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		if err != nil {
			panic(err)
		}

		inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
		if err != nil {
			panic(err)
		}
		//
		// Construct the binning filter
		binAttr := inst1.AllAttributes()[0]
		filt := NewBinningFilter(inst1, 10)
		filt.AddAttribute(binAttr)
		filt.Train()
		inst1f := base.NewLazilyFilteredInstances(inst1, filt)

		// Retrieve the categorical version of the original Attribute
		var cAttr base.Attribute
		for _, a := range inst1f.AllAttributes() {
			if a.GetName() == binAttr.GetName() {
				cAttr = a
			}
		}

		cAttrSpec, err := inst1f.GetAttribute(cAttr)
		So(err, ShouldEqual, nil)
		binAttrSpec, err := inst2.GetAttribute(binAttr)
		So(err, ShouldEqual, nil)

		//
		// Create the LazilyFilteredInstances
		// and check the values
		Convey("Discretized version should match reference", func() {
			_, rows := inst1.Size()
			for i := 0; i < rows; i++ {
				val1 := inst1f.Get(cAttrSpec, i)
				val2 := inst2.Get(binAttrSpec, i)
				val1s := cAttr.GetStringFromSysVal(val1)
				val2s := binAttr.GetStringFromSysVal(val2)
				So(val1s, ShouldEqual, val2s)
			}
		})
	})
}
Example #7
0
func TestChiMergeDiscretization(t *testing.T) {
	Convey("Chi-Merge Discretization", t, func() {
		chimDatasetPath := "../examples/datasets/chim.csv"

		Convey(fmt.Sprintf("With the '%s' dataset", chimDatasetPath), func() {
			instances, err := base.ParseCSVToInstances(chimDatasetPath, true)
			So(err, ShouldBeNil)

			_, rows := instances.Size()

			frequencies := chiMerge(instances, instances.AllAttributes()[0], 0.9, 0, rows)
			values := []float64{}
			for _, entry := range frequencies {
				values = append(values, entry.Value)
			}

			Convey("Computes frequencies correctly", func() {
				So(values, ShouldResemble, []float64{1.3, 56.2, 87.1})
			})
		})

		irisHeadersDatasetpath := "../examples/datasets/iris_headers.csv"

		Convey(fmt.Sprintf("With the '%s' dataset", irisHeadersDatasetpath), func() {
			instances, err := base.ParseCSVToInstances(irisHeadersDatasetpath, true)
			So(err, ShouldBeNil)

			Convey("Sorting the instances first", func() {
				allAttributes := instances.AllAttributes()
				sortedAttributesSpecs := base.ResolveAttributes(instances, allAttributes)[0:1]
				sortedInstances, err := base.Sort(instances, base.Ascending, sortedAttributesSpecs)
				So(err, ShouldBeNil)

				_, rows := sortedInstances.Size()

				frequencies := chiMerge(sortedInstances, sortedInstances.AllAttributes()[0], 0.9, 0, rows)
				values := []float64{}
				for _, entry := range frequencies {
					values = append(values, entry.Value)
				}

				Convey("Computes frequencies correctly", func() {
					So(values, ShouldResemble, []float64{4.3, 5.5, 5.8, 6.3, 7.1})
				})
			})
		})
	})
}
Example #8
0
func TestDBSCANDistanceQuery(t *testing.T) {

	Convey("Should be able to determine which points are in range...", t, func() {

		// Read in the synthetic test data
		inst, err := base.ParseCSVToInstances("synthetic.csv", false)
		So(err, ShouldBeNil)

		// Create a neighbours vector
		neighbours := big.NewInt(0)

		// Compute pairwise distances
		dist, err := computePairwiseDistances(inst, inst.AllAttributes(), pairwise.NewEuclidean())
		So(dist.At(0, 0), ShouldAlmostEqual, 0)
		So(dist.At(0, 1), ShouldAlmostEqual, 1)
		So(dist.At(1, 0), ShouldAlmostEqual, 1)
		So(dist.At(0, 2), ShouldAlmostEqual, math.Sqrt(5))
		So(dist.At(2, 0), ShouldAlmostEqual, math.Sqrt(5))
		So(err, ShouldBeNil)

		// Do the region query
		neighbours = regionQuery(0, neighbours, dist, 1)
		So(neighbours.Bit(0), ShouldEqual, 1)
		So(neighbours.Bit(1), ShouldEqual, 1)
		So(neighbours.Bit(2), ShouldEqual, 0)
		So(neighbours.Bit(3), ShouldEqual, 0)
		So(neighbours.Bit(4), ShouldEqual, 0)

	})

}
Example #9
0
func TestDBSCANSynthetic(t *testing.T) {
	Convey("Synthetic DBSCAN test should work...", t, func() {

		inst, err := base.ParseCSVToInstances("synthetic.csv", false)
		So(err, ShouldBeNil)

		p := DBSCANParameters{
			ClusterParameters{
				inst.AllAttributes(),
				pairwise.NewEuclidean(),
			},
			1,
			1,
		}

		m, err := DBSCAN(inst, p)
		So(err, ShouldBeNil)

		So(len(m), ShouldEqual, 2)
		So(m[1], ShouldContain, 0)
		So(m[1], ShouldContain, 1)
		So(m[1], ShouldContain, 2)
		So(m[1], ShouldContain, 3)

	})
}
Example #10
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	rand.Seed(time.Now().UnixNano())
	insts := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(insts[1])
	filt.Run(insts[0])
	rf := new(BaggedModel)
	for i := 0; i < 10; i++ {
		rf.AddModel(trees.NewRandomTree(2))
	}
	rf.Fit(insts[0])
	fmt.Println(rf)
	predictions := rf.Predict(insts[1])
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Example #11
0
func TestChiMergeFilter(t *testing.T) {
	Convey("Chi-Merge Filter", t, func() {
		// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
		//   Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
		instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		Convey("Create and train the filter", func() {
			filter := NewChiMergeFilter(instances, 0.90)
			filter.AddAttribute(instances.AllAttributes()[0])
			filter.AddAttribute(instances.AllAttributes()[1])
			filter.Train()

			Convey("Filter the dataset", func() {
				filteredInstances := base.NewLazilyFilteredInstances(instances, filter)

				classAttributes := filteredInstances.AllClassAttributes()

				Convey("There should only be one class attribute", func() {
					So(len(classAttributes), ShouldEqual, 1)
				})

				expectedClassAttribute := "Species"

				Convey(fmt.Sprintf("The class attribute should be %s", expectedClassAttribute), func() {
					So(classAttributes[0].GetName(), ShouldEqual, expectedClassAttribute)
				})
			})
		})
	})
}
Example #12
0
func main() {
	// Load in a dataset, with headers. Header attributes will be stored.
	// Think of instances as a Data Frame structure in R or Pandas.
	// You can also create instances from scratch.
	rawData, err := base.ParseCSVToInstances("datasets/iris.csv", false)
	if err != nil {
		panic(err)
	}

	// Print a pleasant summary of your data.
	fmt.Println(rawData)

	//Initialises a new KNN classifier
	cls := knn.NewKnnClassifier("euclidean", 2)

	//Do a training-test split
	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
	cls.Fit(trainData)

	//Calculates the Euclidean distance and returns the most popular label
	predictions := cls.Predict(testData)
	fmt.Println(predictions)

	// Prints precision/recall metrics
	confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Example #13
0
func main() {

	var tree base.Classifier

	rand.Seed(time.Now().UTC().UnixNano())

	// Load in the iris dataset
	iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Discretise the iris dataset with Chi-Merge
	filt := filters.NewChiMergeFilter(iris, 0.99)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(iris)

	// Create a 60-40 training-test split
	insts := base.InstancesTrainTestSplit(iris, 0.60)

	//
	// First up, use ID3
	//
	tree = trees.NewID3DecisionTree(0.6)
	// (Parameter controls train-prune split.)

	// Train the ID3 tree
	tree.Fit(insts[0])

	// Generate predictions
	predictions := tree.Predict(insts[1])

	// Evaluate
	fmt.Println("ID3 Performance")
	cf := eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))

	//
	// Next up, Random Trees
	//

	// Consider two randomly-chosen attributes
	tree = trees.NewRandomTree(2)
	tree.Fit(insts[0])
	predictions = tree.Predict(insts[1])
	fmt.Println("RandomTree Performance")
	cf = eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))

	//
	// Finally, Random Forests
	//
	tree = ensemble.NewRandomForest(100, 3)
	tree.Fit(insts[0])
	predictions = tree.Predict(insts[1])
	fmt.Println("RandomForest Performance")
	cf = eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))
}
Example #14
0
func BenchmarkBaggingRandomForestPredict(t *testing.B) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		t.Fatal("Unable to parse CSV to instances: %s", err.Error())
	}

	rand.Seed(time.Now().UnixNano())
	filt := filters.NewChiMergeFilter(inst, 0.90)
	for _, a := range base.NonClassFloatAttributes(inst) {
		filt.AddAttribute(a)
	}
	filt.Train()
	instf := base.NewLazilyFilteredInstances(inst, filt)

	rf := new(BaggedModel)
	for i := 0; i < 10; i++ {
		rf.AddModel(trees.NewRandomTree(2))
	}

	rf.Fit(instf)
	t.ResetTimer()
	for i := 0; i < 20; i++ {
		rf.Predict(instf)
	}
}
Example #15
0
func main() {

	var tree base.Classifier

	rand.Seed(44111342)

	// Load in the iris dataset
	iris, err := base.ParseCSVToInstances("/home/kralli/go/src/github.com/sjwhitworth/golearn/examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Discretise the iris dataset with Chi-Merge
	filt := filters.NewChiMergeFilter(iris, 0.999)
	for _, a := range base.NonClassFloatAttributes(iris) {
		filt.AddAttribute(a)
	}
	filt.Train()
	irisf := base.NewLazilyFilteredInstances(iris, filt)

	// Create a 60-40 training-test split
	//testData
	trainData, _ := base.InstancesTrainTestSplit(iris, 0.60)

	findBestSplit(trainData)

	//fmt.Println(trainData)
	//fmt.Println(testData)

	fmt.Println(tree)
	fmt.Println(irisf)
}
Example #16
0
func TestPruning(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	fmt.Println(testData)
	filt.Run(testData)
	filt.Run(trainData)
	root := NewRandomTree(2)
	fittrainData, fittestData := base.InstancesTrainTestSplit(trainData, 0.6)
	root.Fit(fittrainData)
	root.Prune(fittestData)
	fmt.Println(root)
	predictions := root.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Example #17
0
func TestBinaryFilterClassPreservation(t *testing.T) {
	Convey("Given a contrived dataset...", t, func() {
		// Read the contrived dataset
		inst, err := base.ParseCSVToInstances("./binary_test.csv", true)
		So(err, ShouldEqual, nil)

		// Add all Attributes to the filter
		bFilt := NewBinaryConvertFilter()
		bAttrs := inst.AllAttributes()
		for _, a := range bAttrs {
			bFilt.AddAttribute(a)
		}
		bFilt.Train()

		// Construct a LazilyFilteredInstances to handle it
		instF := base.NewLazilyFilteredInstances(inst, bFilt)

		Convey("All the expected class Attributes should be present if discretised...", func() {
			attrMap := make(map[string]bool)
			attrMap["arbitraryClass_hi"] = false
			attrMap["arbitraryClass_there"] = false
			attrMap["arbitraryClass_world"] = false

			for _, a := range instF.AllClassAttributes() {
				attrMap[a.GetName()] = true
			}

			So(attrMap["arbitraryClass_hi"], ShouldEqual, true)
			So(attrMap["arbitraryClass_there"], ShouldEqual, true)
			So(attrMap["arbitraryClass_world"], ShouldEqual, true)
		})
	})
}
Example #18
0
func CSVtoKNNData(filename string) base.FixedDataGrid {
	rawData, err := base.ParseCSVToInstances(filename, true)
	if err != nil {
		panic(err)
	}
	return rawData
}
Example #19
0
File: rf.go Project: CTLife/golearn
func main() {

	var tree base.Classifier

	// Load in the iris dataset
	iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	for i := 1; i < 60; i += 2 {
		// Demonstrate the effect of adding more trees to the forest
		// and also how much better it is without discretisation.
		rand.Seed(44111342)

		tree = ensemble.NewRandomForest(i, 4)
		cfs, err := evaluation.GenerateCrossFoldValidationConfusionMatrices(iris, tree, 5)
		if err != nil {
			panic(err)
		}

		mean, variance := evaluation.GetCrossValidatedMetric(cfs, evaluation.GetAccuracy)
		stdev := math.Sqrt(variance)

		fmt.Printf("%d\t%.2f\t(+/- %.2f)\n", i, mean, stdev*2)
	}
}
Example #20
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	rand.Seed(time.Now().UnixNano())
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	for _, a := range base.NonClassFloatAttributes(inst) {
		filt.AddAttribute(a)
	}
	filt.Train()
	trainDataf := base.NewLazilyFilteredInstances(trainData, filt)
	testDataf := base.NewLazilyFilteredInstances(testData, filt)
	rf := new(BaggedModel)
	for i := 0; i < 10; i++ {
		rf.AddModel(trees.NewRandomTree(2))
	}
	rf.Fit(trainDataf)
	fmt.Println(rf)
	predictions := rf.Predict(testDataf)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testDataf, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Example #21
0
func TestChiMerge2(testEnv *testing.T) {
	//
	// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
	//   Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	attrs := make([]int, 1)
	attrs[0] = 0
	inst.Sort(base.Ascending, attrs)
	freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
	if len(freq) != 5 {
		testEnv.Error("Wrong length (%d)", len(freq))
		testEnv.Error(freq)
	}
	if freq[0].Value != 4.3 {
		testEnv.Error(freq[0])
	}
	if freq[1].Value != 5.5 {
		testEnv.Error(freq[1])
	}
	if freq[2].Value != 5.8 {
		testEnv.Error(freq[2])
	}
	if freq[3].Value != 6.3 {
		testEnv.Error(freq[3])
	}
	if freq[4].Value != 7.1 {
		testEnv.Error(freq[4])
	}
}
Example #22
0
func TestPredict(t *testing.T) {

	a := NewAveragePerceptron(10, 1.2, 0.5, 0.3)

	if a == nil {

		t.Errorf("Unable to create average perceptron")
	}

	absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv")
	rawData, err := base.ParseCSVToInstances(absPath, true)
	if err != nil {
		t.Fail()
	}

	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.5)
	a.Fit(trainData)

	if a.trained == false {
		t.Errorf("Perceptron was not trained")
	}

	predictions := a.Predict(testData)
	cf, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		t.Errorf("Couldn't get confusion matrix: %s", err)
		t.Fail()
	}
	fmt.Println(evaluation.GetSummary(cf))
	fmt.Println(trainData)
	fmt.Println(testData)
	if evaluation.GetAccuracy(cf) < 0.65 {
		t.Errorf("Perceptron not trained correctly")
	}
}
func TestLinearRegression(t *testing.T) {
	lr := NewLinearRegression()

	rawData, err := base.ParseCSVToInstances("../examples/datasets/exams.csv", true)
	if err != nil {
		t.Fatal(err)
	}

	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.1)
	err = lr.Fit(trainData)
	if err != nil {
		t.Fatal(err)
	}

	predictions, err := lr.Predict(testData)
	if err != nil {
		t.Fatal(err)
	}

	_, rows := predictions.Size()

	for i := 0; i < rows; i++ {
		fmt.Printf("Expected: %s || Predicted: %s\n", base.GetClass(testData, i), base.GetClass(predictions, i))
	}
}
Example #24
0
func TestChiMergeFrequencyTable(t *testing.T) {
	Convey("Chi-Merge Frequency Table", t, func() {
		instances, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
		So(err, ShouldBeNil)

		frequencyTable := ChiMBuildFrequencyTable(instances.AllAttributes()[0], instances)

		Convey("Computes frequencies correctly", func() {
			So(frequencyTable[0].Frequency["c1"], ShouldEqual, 1)
			So(frequencyTable[0].Frequency["c3"], ShouldEqual, 4)
			So(frequencyTable[10].Frequency["c2"], ShouldEqual, 1)
		})

		Convey("Counts classes correctly", func() {
			classes := chiCountClasses(frequencyTable)

			So(classes["c1"], ShouldEqual, 27)
			So(classes["c2"], ShouldEqual, 12)
			So(classes["c3"], ShouldEqual, 21)
		})

		Convey("Computes statistics correctly", func() {
			So(chiComputeStatistic(frequencyTable[5], frequencyTable[6]), ShouldAlmostEqual, 1.89, 0.01)
			So(chiComputeStatistic(frequencyTable[1], frequencyTable[2]), ShouldAlmostEqual, 1.08, 0.01)
		})
	})
}
Example #25
0
func main() {

	// Instances can be read using ParseCsvToInstances
	rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Instances can be printed, and you'll see a human-readable summary
	// if you do so. The first section is a line like
	//     Instances with 150 row(s) and 5 attribute(s)
	//
	// It next prints all the attributes
	//     FloatAttribute(Sepal length)
	//     FloatAttribute(Sepal width)
	//     FloatAttribute(Petal length)
	//     FloatAttribute(Petal width)
	//     CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica])
	// The final attribute has an asterisk (*) printed before it,
	// meaning that it is the class variable. It then prints out up to
	// 30 rows which correspond to those attributes.
	// 	5.10 3.50 1.40 0.20 Iris-setosa
	// 	4.90 3.00 1.40 0.20 Iris-setosa
	fmt.Println(rawData)

	// If two decimal places isn't enough, you can update the
	// Precision field on any FloatAttribute
	if attr, ok := rawData.GetAttr(0).(*base.FloatAttribute); !ok {
		panic("Invalid cast")
	} else {
		attr.Precision = 4
	}
	// Now the first column has more precision
	fmt.Println(rawData)

	// We can update the set of Instances, although the API
	// for doing so is not very sophisticated.
	rawData.SetAttrStr(0, 0, "1.00")
	rawData.SetAttrStr(0, rawData.ClassIndex, "Iris-unusual")
	fmt.Println(rawData)

	// There is a way of creating new Instances from scratch.
	// Inside an Instance, everything's stored as float64
	newData := make([]float64, 2)
	newData[0] = 1.0
	newData[1] = 0.0

	// Let's create some attributes
	attrs := make([]base.Attribute, 2)
	attrs[0] = base.NewFloatAttribute()
	attrs[0].SetName("Arbitrary Float Quantity")
	attrs[1] = new(base.CategoricalAttribute)
	attrs[1].SetName("Class")
	// Insert a standard class
	attrs[1].GetSysValFromString("A")

	// Now let's create the final instances set
	newInst := base.NewInstancesFromRaw(attrs, 1, newData)
	fmt.Println(newInst)
}
Example #26
0
func TestDBSCAN(t *testing.T) {

	Convey("Loading some data and labels...", t, func() {

		inst, err := base.ParseCSVToInstances("dbscan.csv", false)
		So(err, ShouldBeNil)

		file, err := os.Open("dbscan_labels.csv")
		defer file.Close()
		So(err, ShouldBeNil)

		clusterMap := ClusterMap(make(map[int][]int))

		scanner := bufio.NewScanner(file)
		line := -1
		for scanner.Scan() {
			line = line + 1
			v, err := strconv.ParseInt(scanner.Text(), 10, 64)
			if err != nil {
				panic(err)
			}
			v = v + 1 // -1 are noise in scikit-learn's DBSCAN
			c := int(v)
			if c == 0 {
				continue
			}
			if _, ok := clusterMap[c]; !ok {
				clusterMap[c] = make([]int, 0)
			}
			clusterMap[c] = append(clusterMap[c], line)
		}

		Convey("Our DBSCAN implementation should match...", func() {
			p := DBSCANParameters{
				ClusterParameters{
					inst.AllAttributes(),
					pairwise.NewEuclidean(),
				},
				0.3,
				10,
			}
			m, err := DBSCAN(inst, p)
			Convey("There should be nothing in the result that's smaller than MinPts", func() {

				for id := range m {
					So(len(m[id]), ShouldBeGreaterThanOrEqualTo, 10)
				}

			})
			So(err, ShouldBeNil)
			eq, err := clusterMap.Equals(m)
			So(err, ShouldBeNil)
			So(eq, ShouldBeTrue)
		})
	})

}
Example #27
0
func TestRandomTreeClassificationWithoutDiscretisation(t *testing.T) {
	Convey("Predictions on filtered data with a Random Tree", t, func() {
		instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		trainData, testData := base.InstancesTrainTestSplit(instances, 0.6)

		verifyTreeClassification(trainData, testData)
	})
}
Example #28
0
func BenchmarkFit(b *testing.B) {

	a := NewAveragePerceptron(10, 1.2, 0.5, 0.3)
	absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv")
	rawData, _ := base.ParseCSVToInstances(absPath, true)
	trainData, _ := base.InstancesTrainTestSplit(rawData, 0.5)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		a.Fit(trainData)
	}
}
Example #29
0
func TestBinning(testEnv *testing.T) {
	inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
	inst3, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	filt := NewBinningFilter(inst1, 10)
	filt.AddAttribute(inst1.GetAttr(0))
	filt.Build()
	filt.Run(inst1)
	for i := 0; i < inst1.Rows; i++ {
		val1 := inst1.Get(i, 0)
		val2 := inst2.Get(i, 0)
		val3 := inst3.Get(i, 0)
		if math.Abs(val1-val2) >= 1 {
			testEnv.Error(val1, val2, val3, i)
		}
	}
}
Example #30
0
func TestID3(testEnv *testing.T) {

	// Import the "PlayTennis" dataset
	inst, err := base.ParseCSVToInstances("../examples/datasets/tennis.csv", true)
	if err != nil {
		panic(err)
	}

	// Build the decision tree
	tree := NewID3DecisionTree(0.0)
	tree.Fit(inst)
	root := tree.Root

	// Verify the tree
	// First attribute should be "outlook"
	if root.SplitAttr.GetName() != "outlook" {
		testEnv.Error(root)
	}
	sunnyChild := root.Children["sunny"]
	overcastChild := root.Children["overcast"]
	rainyChild := root.Children["rainy"]
	if sunnyChild.SplitAttr.GetName() != "humidity" {
		testEnv.Error(sunnyChild)
	}
	if rainyChild.SplitAttr.GetName() != "windy" {
		testEnv.Error(rainyChild)
	}
	if overcastChild.SplitAttr != nil {
		testEnv.Error(overcastChild)
	}

	sunnyLeafHigh := sunnyChild.Children["high"]
	sunnyLeafNormal := sunnyChild.Children["normal"]
	if sunnyLeafHigh.Class != "no" {
		testEnv.Error(sunnyLeafHigh)
	}
	if sunnyLeafNormal.Class != "yes" {
		testEnv.Error(sunnyLeafNormal)
	}

	windyLeafFalse := rainyChild.Children["false"]
	windyLeafTrue := rainyChild.Children["true"]
	if windyLeafFalse.Class != "yes" {
		testEnv.Error(windyLeafFalse)
	}
	if windyLeafTrue.Class != "no" {
		testEnv.Error(windyLeafTrue)
	}

	if overcastChild.Class != "yes" {
		testEnv.Error(overcastChild)
	}
}