func testEmbeddedFeaturesFiltering(numBaseModels, maxTreeHeight int, train_dataset, test_dataset *mlearn.DataSet) ( []testEmbeddedFeaturesFilteringResult, float64) { fmt.Println("training adaboost classifier with embedded features filtering ...") cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset, adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10, EnableEmbeddedFeaturesRanking: true}) adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer) classifier := adaboostTrainer.TrainClassifier(train_dataset, adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels, EnableEmbeddedFeaturesRanking: true}) ranked_features := adaboostTrainer.GetRankedFeatures() fmt.Println("predicting ...") predictions := make([]int, len(test_dataset.Classes)) for i := range predictions { predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i)) } precision, recall, ref_f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum) fmt.Printf("reference precision: %.3v recall: %.3v f1: %.3v \n\n", precision, recall, ref_f1) var test_results []testEmbeddedFeaturesFilteringResult for j := 10; j < test_dataset.FeaturesNum; j++ { fmt.Printf("now training again using only %v of selected features ...\n", j) selected_features := ranked_features[:j] fmt.Println("selected features are: ") fmt.Println(selected_features) train_data_subset := &mlearn.DataSet{} test_data_subset := &mlearn.DataSet{} *train_data_subset = *train_dataset *test_data_subset = *test_dataset train_data_subset.SubsetFeatures(selected_features) test_data_subset.SubsetFeatures(selected_features) fmt.Println() cartTrainer = adaboost.NewCARTClassifierTrainer(train_data_subset, adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10}) adaboostTrainer = adaboost.NewAdaboostClassifierTrainer(cartTrainer) classifier = adaboostTrainer.TrainClassifier(train_data_subset, adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels}) fmt.Println("predicting ...") predictions = make([]int, len(test_data_subset.Classes)) for i := range predictions { predictions[i] = classifier.PredictProbe(test_data_subset.GetSample(i)) } precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum) fmt.Printf("precision: %.3v recall: %.3v f1: %.3v \n", precision, recall, f1) test_results = append(test_results, testEmbeddedFeaturesFilteringResult{FeaturesCount: j, F1: f1}) } return test_results, ref_f1 }
func testCARTClassifier(train_dataset, test_dataset *mlearn.DataSet) { fmt.Println("testing CART classifier itself ...") cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset, adaboost.CARTClassifierTrainOptions{MaxDepth: 0, MinElementsInLeaf: 1}) classifier := cartTrainer.TrainClassifier(train_dataset) predictions := make([]int, len(test_dataset.Classes)) for i := range predictions { predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i)) } precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum) // here we expect f1 == 1.0 fmt.Printf("precision: %.3v recall: %.3v f1: %.3v \n\n", precision, recall, f1) }
func testAdaboostClassifier(numBaseModels, maxTreeHeight, minLeafItems int, train_dataset, test_dataset *mlearn.DataSet) { fmt.Println("training adaboost classifier over CART trees ...") cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset, adaboost.CARTClassifierTrainOptions{MaxDepth: maxTreeHeight, MinElementsInLeaf: minLeafItems}) adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer) classifier := adaboostTrainer.TrainClassifier(train_dataset, adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels}) fmt.Println("predicting ...") predictions := make([]int, len(test_dataset.Classes)) for i := range predictions { predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i)) } precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum) fmt.Printf("\nprecision: %.3v recall: %.3v f1: %.3v \n", precision, recall, f1) }