예제 #1
0
func testEmbeddedFeaturesFiltering(numBaseModels, maxTreeHeight int, train_dataset, test_dataset *mlearn.DataSet) (
	[]testEmbeddedFeaturesFilteringResult, float64) {

	fmt.Println("training adaboost classifier with embedded features filtering ...")
	cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset,
		adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10, EnableEmbeddedFeaturesRanking: true})

	adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer)
	classifier := adaboostTrainer.TrainClassifier(train_dataset,
		adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels, EnableEmbeddedFeaturesRanking: true})
	ranked_features := adaboostTrainer.GetRankedFeatures()

	fmt.Println("predicting ...")
	predictions := make([]int, len(test_dataset.Classes))
	for i := range predictions {
		predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i))
	}

	precision, recall, ref_f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
	fmt.Printf("reference precision: %.3v  recall: %.3v  f1: %.3v \n\n", precision, recall, ref_f1)

	var test_results []testEmbeddedFeaturesFilteringResult
	for j := 10; j < test_dataset.FeaturesNum; j++ {
		fmt.Printf("now training again using only %v of selected features ...\n", j)
		selected_features := ranked_features[:j]
		fmt.Println("selected features are: ")
		fmt.Println(selected_features)

		train_data_subset := &mlearn.DataSet{}
		test_data_subset := &mlearn.DataSet{}
		*train_data_subset = *train_dataset
		*test_data_subset = *test_dataset
		train_data_subset.SubsetFeatures(selected_features)
		test_data_subset.SubsetFeatures(selected_features)

		fmt.Println()
		cartTrainer = adaboost.NewCARTClassifierTrainer(train_data_subset,
			adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10})

		adaboostTrainer = adaboost.NewAdaboostClassifierTrainer(cartTrainer)
		classifier = adaboostTrainer.TrainClassifier(train_data_subset,
			adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels})

		fmt.Println("predicting ...")
		predictions = make([]int, len(test_data_subset.Classes))
		for i := range predictions {
			predictions[i] = classifier.PredictProbe(test_data_subset.GetSample(i))
		}

		precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
		fmt.Printf("precision: %.3v  recall: %.3v  f1: %.3v \n", precision, recall, f1)

		test_results = append(test_results, testEmbeddedFeaturesFilteringResult{FeaturesCount: j, F1: f1})
	}

	return test_results, ref_f1
}
예제 #2
0
func testAdaboostClassifier(numBaseModels, maxTreeHeight, minLeafItems int, train_dataset, test_dataset *mlearn.DataSet) {
	fmt.Println("training adaboost classifier over CART trees ...")
	cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset,
		adaboost.CARTClassifierTrainOptions{MaxDepth: maxTreeHeight, MinElementsInLeaf: minLeafItems})

	adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer)
	classifier := adaboostTrainer.TrainClassifier(train_dataset,
		adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels})

	fmt.Println("predicting ...")
	predictions := make([]int, len(test_dataset.Classes))
	for i := range predictions {
		predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i))
	}

	precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
	fmt.Printf("\nprecision: %.3v  recall: %.3v  f1: %.3v \n", precision, recall, f1)
}