Exemplo n.º 1
0
func main() {

	var tree base.Classifier

	rand.Seed(time.Now().UTC().UnixNano())

	// Load in the iris dataset
	iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Discretise the iris dataset with Chi-Merge
	filt := filters.NewChiMergeFilter(iris, 0.99)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(iris)

	// Create a 60-40 training-test split
	insts := base.InstancesTrainTestSplit(iris, 0.60)

	//
	// First up, use ID3
	//
	tree = trees.NewID3DecisionTree(0.6)
	// (Parameter controls train-prune split.)

	// Train the ID3 tree
	tree.Fit(insts[0])

	// Generate predictions
	predictions := tree.Predict(insts[1])

	// Evaluate
	fmt.Println("ID3 Performance")
	cf := eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))

	//
	// Next up, Random Trees
	//

	// Consider two randomly-chosen attributes
	tree = trees.NewRandomTree(2)
	tree.Fit(insts[0])
	predictions = tree.Predict(insts[1])
	fmt.Println("RandomTree Performance")
	cf = eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))

	//
	// Finally, Random Forests
	//
	tree = ensemble.NewRandomForest(100, 3)
	tree.Fit(insts[0])
	predictions = tree.Predict(insts[1])
	fmt.Println("RandomForest Performance")
	cf = eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(eval.GetSummary(cf))
}
Exemplo n.º 2
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	rand.Seed(time.Now().UnixNano())
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	for _, a := range base.NonClassFloatAttributes(inst) {
		filt.AddAttribute(a)
	}
	filt.Train()
	trainDataf := base.NewLazilyFilteredInstances(trainData, filt)
	testDataf := base.NewLazilyFilteredInstances(testData, filt)
	rf := new(BaggedModel)
	for i := 0; i < 10; i++ {
		rf.AddModel(trees.NewRandomTree(2))
	}
	rf.Fit(trainDataf)
	fmt.Println(rf)
	predictions := rf.Predict(testDataf)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testDataf, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 3
0
func main() {
	// Load in a dataset, with headers. Header attributes will be stored.
	// Think of instances as a Data Frame structure in R or Pandas.
	// You can also create instances from scratch.
	rawData, err := base.ParseCSVToInstances("datasets/iris.csv", false)
	if err != nil {
		panic(err)
	}

	// Print a pleasant summary of your data.
	fmt.Println(rawData)

	//Initialises a new KNN classifier
	cls := knn.NewKnnClassifier("euclidean", 2)

	//Do a training-test split
	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
	cls.Fit(trainData)

	//Calculates the Euclidean distance and returns the most popular label
	predictions := cls.Predict(testData)
	fmt.Println(predictions)

	// Prints precision/recall metrics
	confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Exemplo n.º 4
0
func TestPruning(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	fmt.Println(testData)
	filt.Run(testData)
	filt.Run(trainData)
	root := NewRandomTree(2)
	fittrainData, fittestData := base.InstancesTrainTestSplit(trainData, 0.6)
	root.Fit(fittrainData)
	root.Prune(fittestData)
	fmt.Println(root)
	predictions := root.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 5
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	rand.Seed(time.Now().UnixNano())
	insts := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(insts[1])
	filt.Run(insts[0])
	rf := new(BaggedModel)
	for i := 0; i < 10; i++ {
		rf.AddModel(trees.NewRandomTree(2))
	}
	rf.Fit(insts[0])
	fmt.Println(rf)
	predictions := rf.Predict(insts[1])
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 6
0
func TestPredict(t *testing.T) {

	a := NewAveragePerceptron(10, 1.2, 0.5, 0.3)

	if a == nil {

		t.Errorf("Unable to create average perceptron")
	}

	absPath, _ := filepath.Abs("../examples/datasets/house-votes-84.csv")
	rawData, err := base.ParseCSVToInstances(absPath, true)
	if err != nil {
		t.Fail()
	}

	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.5)
	a.Fit(trainData)

	if a.trained == false {
		t.Errorf("Perceptron was not trained")
	}

	predictions := a.Predict(testData)
	cf, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		t.Errorf("Couldn't get confusion matrix: %s", err)
		t.Fail()
	}
	fmt.Println(evaluation.GetSummary(cf))
	fmt.Println(trainData)
	fmt.Println(testData)
	if evaluation.GetAccuracy(cf) < 0.65 {
		t.Errorf("Perceptron not trained correctly")
	}
}
Exemplo n.º 7
0
func BenchmarkKNNWithNoOpts(b *testing.B) {
	// Load
	train, test := readMnist()
	cls := NewKnnClassifier("euclidean", 1)
	cls.AllowOptimisations = false
	cls.Fit(train)
	predictions := cls.Predict(test)
	c, err := evaluation.GetConfusionMatrix(test, predictions)
	if err != nil {
		panic(err)
	}
	fmt.Println(evaluation.GetSummary(c))
	fmt.Println(evaluation.GetAccuracy(c))
}
Exemplo n.º 8
0
func TestMultiSVMUnweighted(t *testing.T) {
	Convey("Loading data...", t, func() {
		inst, err := base.ParseCSVToInstances("../examples/datasets/articles.csv", false)
		So(err, ShouldBeNil)
		X, Y := base.InstancesTrainTestSplit(inst, 0.4)

		m := NewMultiLinearSVC("l1", "l2", true, 1.0, 1e-4, nil)
		m.Fit(X)

		Convey("Predictions should work...", func() {
			predictions, err := m.Predict(Y)
			cf, err := evaluation.GetConfusionMatrix(Y, predictions)
			So(err, ShouldEqual, nil)
			So(evaluation.GetAccuracy(cf), ShouldBeGreaterThan, 0.70)
		})
	})
}
func main() {
	data, err := base.ParseCSVToInstances("iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	cls := knn.NewKnnClassifier("euclidean", 2)

	trainData, testData := base.InstancesTrainTestSplit(data, 0.8)
	cls.Fit(trainData)

	predictions := cls.Predict(testData)
	fmt.Println(predictions)

	confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Exemplo n.º 10
0
func NewTestTrial(filename string, split float64) bool {
	cls := knn.NewKnnClassifier("euclidean", 2)
	data := CSVtoKNNData(filename)
	train, test := base.InstancesTrainTestSplit(data, split)

	cls.Fit(train)
	//Calculates the Euclidean distance and returns the most popular label
	predictions := cls.Predict(test)
	fmt.Println(predictions)

	confusionMat, err := evaluation.GetConfusionMatrix(test, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(confusionMat))

	return true
}
Exemplo n.º 11
0
func TestRandomForest(t *testing.T) {
	Convey("Given a valid CSV file", t, func() {
		inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		Convey("When Chi-Merge filtering the data", func() {
			filt := filters.NewChiMergeFilter(inst, 0.90)
			for _, a := range base.NonClassFloatAttributes(inst) {
				filt.AddAttribute(a)
			}
			filt.Train()
			instf := base.NewLazilyFilteredInstances(inst, filt)

			Convey("Splitting the data into test and training sets", func() {
				trainData, testData := base.InstancesTrainTestSplit(instf, 0.60)

				Convey("Fitting and predicting with a Random Forest", func() {
					rf := NewRandomForest(10, 3)
					err = rf.Fit(trainData)
					So(err, ShouldBeNil)

					predictions, err := rf.Predict(testData)
					So(err, ShouldBeNil)

					confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions should be somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMat), ShouldBeGreaterThan, 0.35)
					})
				})
			})
		})

		Convey("Fitting with a Random Forest with too many features compared to the data", func() {
			rf := NewRandomForest(10, len(base.NonClassAttributes(inst))+1)
			err = rf.Fit(inst)

			Convey("Should return an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})
}
Exemplo n.º 12
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.60)
	filt := filters.NewChiMergeFilter(trainData, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(testData)
	filt.Run(trainData)
	rf := NewRandomForest(10, 3)
	rf.Fit(trainData)
	predictions := rf.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 13
0
func main() {
	rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	//Initialises a new KNN classifier
	cls := knn.NewKnnClassifier("euclidean", 2)

	//Do a training-test split
	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
	cls.Fit(trainData)

	//Calculates the Euclidean distance and returns the most popular label
	predictions := cls.Predict(testData)
	fmt.Println(predictions)

	// Prints precision/recall metrics
	confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Exemplo n.º 14
0
func TestMultiSVMWeighted(t *testing.T) {
	Convey("Loading data...", t, func() {
		weights := make(map[string]float64)
		weights["Finance"] = 0.1739
		weights["Tech"] = 0.0750
		weights["Politics"] = 0.4928

		inst, err := base.ParseCSVToInstances("../examples/datasets/articles.csv", false)
		So(err, ShouldBeNil)
		X, Y := base.InstancesTrainTestSplit(inst, 0.4)

		m := NewMultiLinearSVC("l1", "l2", true, 0.62, 1e-4, weights)
		m.Fit(X)

		Convey("Predictions should work...", func() {
			predictions, err := m.Predict(Y)
			cf, err := evaluation.GetConfusionMatrix(Y, predictions)
			So(err, ShouldEqual, nil)
			So(evaluation.GetAccuracy(cf), ShouldBeGreaterThan, 0.70)
		})
	})
}
Exemplo n.º 15
0
func main() {
	// Load and parse the data from csv files
	fmt.Println("Loading data...")
	trainData, err := base.ParseCSVToInstances("data/mnist_train.csv", true)
	if err != nil {
		panic(err)
	}
	testData, err := base.ParseCSVToInstances("data/mnist_test.csv", true)
	if err != nil {
		panic(err)
	}

	// Create a new linear SVC with some good default values
	classifier, err := linear_models.NewLinearSVC("l1", "l2", true, 1.0, 1e-4)
	if err != nil {
		panic(err)
	}

	// Don't output information on each iteration
	base.Silent()

	// Train the linear SVC
	fmt.Println("Training...")
	classifier.Fit(trainData)

	// Make predictions for the test data
	fmt.Println("Predicting...")
	predictions, err := classifier.Predict(testData)
	if err != nil {
		panic(err)
	}

	// Get a confusion matrix and print out some accuracy stats for our predictions
	confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Exemplo n.º 16
0
func TestOneVsAllModel(t *testing.T) {

	classifierFunc := func(c string) base.Classifier {
		m, err := linear_models.NewLinearSVC("l1", "l2", true, 1.0, 1e-4)
		if err != nil {
			panic(err)
		}
		return m
	}

	Convey("Given data", t, func() {
		inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		X, Y := base.InstancesTrainTestSplit(inst, 0.4)

		m := NewOneVsAllModel(classifierFunc)
		m.Fit(X)

		Convey("The maximum class index should be 2", func() {
			So(m.maxClassVal, ShouldEqual, 2)
		})

		Convey("There should be three of everything...", func() {
			So(len(m.filters), ShouldEqual, 3)
			So(len(m.classifiers), ShouldEqual, 3)
		})

		Convey("Predictions should work...", func() {
			predictions, err := m.Predict(Y)
			So(err, ShouldEqual, nil)
			cf, err := evaluation.GetConfusionMatrix(Y, predictions)
			So(err, ShouldEqual, nil)
			fmt.Println(evaluation.GetAccuracy(cf))
			fmt.Println(evaluation.GetSummary(cf))
		})
	})
}
Exemplo n.º 17
0
func TestBaggedModelRandomForest(t *testing.T) {
	Convey("Given data", t, func() {
		inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		Convey("Splitting the data into training and test data", func() {
			trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)

			Convey("Filtering the split datasets", func() {
				rand.Seed(time.Now().UnixNano())
				filt := filters.NewChiMergeFilter(inst, 0.90)
				for _, a := range base.NonClassFloatAttributes(inst) {
					filt.AddAttribute(a)
				}
				filt.Train()
				trainDataf := base.NewLazilyFilteredInstances(trainData, filt)
				testDataf := base.NewLazilyFilteredInstances(testData, filt)

				Convey("Fitting and Predicting with a Bagged Model of 10 Random Trees", func() {
					rf := new(BaggedModel)
					for i := 0; i < 10; i++ {
						rf.AddModel(trees.NewRandomTree(2))
					}

					rf.Fit(trainDataf)
					predictions := rf.Predict(testDataf)

					confusionMat, err := evaluation.GetConfusionMatrix(testDataf, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions are somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMat), ShouldBeGreaterThan, 0.5)
					})
				})
			})
		})
	})
}
Exemplo n.º 18
0
func TestRandomForest1(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	filt := filters.NewChiMergeFilter(inst, 0.90)
	for _, a := range base.NonClassFloatAttributes(inst) {
		filt.AddAttribute(a)
	}
	filt.Train()
	instf := base.NewLazilyFilteredInstances(inst, filt)

	trainData, testData := base.InstancesTrainTestSplit(instf, 0.60)

	rf := NewRandomForest(10, 3)
	rf.Fit(trainData)
	predictions := rf.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 19
0
func TestID3Classification(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	filt := filters.NewBinningFilter(inst, 10)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(inst)
	fmt.Println(inst)
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.70)
	// Build the decision tree
	rule := new(InformationGainRuleGenerator)
	root := InferID3Tree(trainData, rule)
	fmt.Println(root)
	predictions := root.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 20
0
func TestRandomTreeClassification2(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	insts := base.InstancesTrainTestSplit(inst, 0.4)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	fmt.Println(insts[1])
	filt.Run(insts[1])
	filt.Run(insts[0])
	root := NewRandomTree(2)
	root.Fit(insts[0])
	fmt.Println(root)
	predictions := root.Predict(insts[1])
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(insts[1], predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 21
0
func main() {

	rand.Seed(4402201)

	rawData, err := base.ParseCSVToInstances("../datasets/house-votes-84.csv", true)
	if err != nil {
		panic(err)
	}

	//Initialises a new AveragePerceptron classifier
	cls := perceptron.NewAveragePerceptron(10, 1.2, 0.5, 0.3)

	//Do a training-test split
	trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
	fmt.Println(trainData)
	fmt.Println(testData)
	cls.Fit(trainData)

	predictions := cls.Predict(testData)

	// Prints precision/recall metrics
	confusionMat, _ := evaluation.GetConfusionMatrix(testData, predictions)
	fmt.Println(evaluation.GetSummary(confusionMat))
}
Exemplo n.º 22
0
func TestRandomTreeClassification(testEnv *testing.T) {
	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}
	trainData, testData := base.InstancesTrainTestSplit(inst, 0.6)
	filt := filters.NewChiMergeFilter(inst, 0.90)
	filt.AddAllNumericAttributes()
	filt.Build()
	filt.Run(trainData)
	filt.Run(testData)
	fmt.Println(inst)
	r := new(RandomTreeRuleGenerator)
	r.Attributes = 2
	root := InferID3Tree(trainData, r)
	fmt.Println(root)
	predictions := root.Predict(testData)
	fmt.Println(predictions)
	confusionMat := eval.GetConfusionMatrix(testData, predictions)
	fmt.Println(confusionMat)
	fmt.Println(eval.GetMacroPrecision(confusionMat))
	fmt.Println(eval.GetMacroRecall(confusionMat))
	fmt.Println(eval.GetSummary(confusionMat))
}
Exemplo n.º 23
0
func TestRandomTreeClassification(t *testing.T) {
	Convey("Predictions on filtered data with a Random Tree", t, func() {
		instances, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
		So(err, ShouldBeNil)

		trainData, testData := base.InstancesTrainTestSplit(instances, 0.6)

		filter := filters.NewChiMergeFilter(instances, 0.9)
		for _, a := range base.NonClassFloatAttributes(instances) {
			filter.AddAttribute(a)
		}
		filter.Train()
		filteredTrainData := base.NewLazilyFilteredInstances(trainData, filter)
		filteredTestData := base.NewLazilyFilteredInstances(testData, filter)

		Convey("Using InferID3Tree to create the tree and do the fitting", func() {
			Convey("Using a RandomTreeRule", func() {
				randomTreeRuleGenerator := new(RandomTreeRuleGenerator)
				randomTreeRuleGenerator.Attributes = 2
				root := InferID3Tree(filteredTrainData, randomTreeRuleGenerator)

				Convey("Predicting with the tree", func() {
					predictions, err := root.Predict(filteredTestData)
					So(err, ShouldBeNil)

					confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions should be somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
					})
				})
			})

			Convey("Using a InformationGainRule", func() {
				informationGainRuleGenerator := new(InformationGainRuleGenerator)
				root := InferID3Tree(filteredTrainData, informationGainRuleGenerator)

				Convey("Predicting with the tree", func() {
					predictions, err := root.Predict(filteredTestData)
					So(err, ShouldBeNil)

					confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions should be somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
					})
				})
			})
		})

		Convey("Using NewRandomTree to create the tree", func() {
			root := NewRandomTree(2)

			Convey("Fitting with the tree", func() {
				err = root.Fit(filteredTrainData)
				So(err, ShouldBeNil)

				Convey("Predicting with the tree, *without* pruning first", func() {
					predictions, err := root.Predict(filteredTestData)
					So(err, ShouldBeNil)

					confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions should be somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
					})
				})

				Convey("Predicting with the tree, pruning first", func() {
					root.Prune(filteredTestData)

					predictions, err := root.Predict(filteredTestData)
					So(err, ShouldBeNil)

					confusionMatrix, err := evaluation.GetConfusionMatrix(filteredTestData, predictions)
					So(err, ShouldBeNil)

					Convey("Predictions should be somewhat accurate", func() {
						So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.4)
					})
				})
			})
		})
	})
}
Exemplo n.º 24
0
func verifyTreeClassification(trainData, testData base.FixedDataGrid) {
	rand.Seed(44414515)
	Convey("Using InferID3Tree to create the tree and do the fitting", func() {
		Convey("Using a RandomTreeRule", func() {
			randomTreeRuleGenerator := new(RandomTreeRuleGenerator)
			randomTreeRuleGenerator.Attributes = 2
			root := InferID3Tree(trainData, randomTreeRuleGenerator)

			Convey("Predicting with the tree", func() {
				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
				})
			})
		})

		Convey("Using a InformationGainRule", func() {
			informationGainRuleGenerator := new(InformationGainRuleGenerator)
			root := InferID3Tree(trainData, informationGainRuleGenerator)

			Convey("Predicting with the tree", func() {
				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
				})
			})
		})
		Convey("Using a GiniCoefficientRuleGenerator", func() {
			gRuleGen := new(GiniCoefficientRuleGenerator)
			root := InferID3Tree(trainData, gRuleGen)
			Convey("Predicting with the tree", func() {
				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
				})
			})
		})
		Convey("Using a InformationGainRatioRuleGenerator", func() {
			gRuleGen := new(InformationGainRatioRuleGenerator)
			root := InferID3Tree(trainData, gRuleGen)
			Convey("Predicting with the tree", func() {
				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
				})
			})
		})

	})

	Convey("Using NewRandomTree to create the tree", func() {
		root := NewRandomTree(2)

		Convey("Fitting with the tree", func() {
			err := root.Fit(trainData)
			So(err, ShouldBeNil)

			Convey("Predicting with the tree, *without* pruning first", func() {
				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.5)
				})
			})

			Convey("Predicting with the tree, pruning first", func() {
				root.Prune(testData)

				predictions, err := root.Predict(testData)
				So(err, ShouldBeNil)

				confusionMatrix, err := evaluation.GetConfusionMatrix(testData, predictions)
				So(err, ShouldBeNil)

				Convey("Predictions should be somewhat accurate", func() {
					So(evaluation.GetAccuracy(confusionMatrix), ShouldBeGreaterThan, 0.4)
				})
			})
		})
	})
}
Exemplo n.º 25
0
// computeAccuracy is a helper method for Prune()
func computeAccuracy(predictions base.FixedDataGrid, from base.FixedDataGrid) float64 {
	cf, _ := evaluation.GetConfusionMatrix(from, predictions)
	return evaluation.GetAccuracy(cf)
}
Exemplo n.º 26
0
func main() {

	var tree base.Classifier

	rand.Seed(44111342)

	// Load in the iris dataset
	iris, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Discretise the iris dataset with Chi-Merge
	filt := filters.NewChiMergeFilter(iris, 0.999)
	for _, a := range base.NonClassFloatAttributes(iris) {
		filt.AddAttribute(a)
	}
	filt.Train()
	irisf := base.NewLazilyFilteredInstances(iris, filt)

	// Create a 60-40 training-test split
	trainData, testData := base.InstancesTrainTestSplit(irisf, 0.60)

	//
	// First up, use ID3
	//
	tree = trees.NewID3DecisionTree(0.6)
	// (Parameter controls train-prune split.)

	// Train the ID3 tree
	err = tree.Fit(trainData)
	if err != nil {
		panic(err)
	}

	// Generate predictions
	predictions, err := tree.Predict(testData)
	if err != nil {
		panic(err)
	}

	// Evaluate
	fmt.Println("ID3 Performance (information gain)")
	cf, err := evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(cf))

	tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.InformationGainRatioRuleGenerator))
	// (Parameter controls train-prune split.)

	// Train the ID3 tree
	err = tree.Fit(trainData)
	if err != nil {
		panic(err)
	}

	// Generate predictions
	predictions, err = tree.Predict(testData)
	if err != nil {
		panic(err)
	}

	// Evaluate
	fmt.Println("ID3 Performance (information gain ratio)")
	cf, err = evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(cf))

	tree = trees.NewID3DecisionTreeFromRule(0.6, new(trees.GiniCoefficientRuleGenerator))
	// (Parameter controls train-prune split.)

	// Train the ID3 tree
	err = tree.Fit(trainData)
	if err != nil {
		panic(err)
	}

	// Generate predictions
	predictions, err = tree.Predict(testData)
	if err != nil {
		panic(err)
	}

	// Evaluate
	fmt.Println("ID3 Performance (gini index generator)")
	cf, err = evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(cf))
	//
	// Next up, Random Trees
	//

	// Consider two randomly-chosen attributes
	tree = trees.NewRandomTree(2)
	err = tree.Fit(testData)
	if err != nil {
		panic(err)
	}
	predictions, err = tree.Predict(testData)
	if err != nil {
		panic(err)
	}
	fmt.Println("RandomTree Performance")
	cf, err = evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(cf))

	//
	// Finally, Random Forests
	//
	tree = ensemble.NewRandomForest(70, 3)
	err = tree.Fit(trainData)
	if err != nil {
		panic(err)
	}
	predictions, err = tree.Predict(testData)
	if err != nil {
		panic(err)
	}
	fmt.Println("RandomForest Performance")
	cf, err = evaluation.GetConfusionMatrix(testData, predictions)
	if err != nil {
		panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
	}
	fmt.Println(evaluation.GetSummary(cf))
}
Exemplo n.º 27
0
// computeAccuracy is a helper method for Prune()
func computeAccuracy(predictions *base.Instances, from *base.Instances) float64 {
	cf := eval.GetConfusionMatrix(from, predictions)
	return eval.GetAccuracy(cf)
}