예제 #1
0
func (trainer *AdaboostClassifierTrainer) trainNextBaseEstimator(data_set *mlearn.DataSet, step int) (
	float64, mlearn.BaseClassifier, float64) {
	base_estimator := trainer.baseModelTrainer.TrainClassifierWithWeights(data_set, trainer.weights)
	// embedded features ranking
	if trainer.options.EnableEmbeddedFeaturesRanking {
		for j := range trainer.EmbeddedFeaturesRank {
			trainer.EmbeddedFeaturesRank[j] += trainer.baseModelTrainer.GetFeaturesRank()[j]
		}
	}

	// save the predictions for later use
	sample := make([]float64, data_set.FeaturesNum)
	for i := range trainer.prediction {
		data_set.GetSampleInplace(i, sample)
		trainer.prediction[i] = base_estimator.PredictProbe(sample)
	}

	// calculate the weighted prediction error
	var err float64
	for i := 0; i < data_set.SamplesNum; i++ {
		if trainer.prediction[i] != data_set.Classes[i] {
			err += trainer.weights[i]
		}
	}

	// check it to prevent math.Log domain violation (err == 0 means that there's nothing to boost)
	if err == 0 {
		return 1.0, base_estimator, err
	}
	b := math.Log((1.0-err)/err) + math.Log(float64(data_set.ClassesNum)-1.0)

	// update weights according to error
	const minimalWeight float64 = 10e-9
	var weights_sum float64
	for i := range trainer.weights {
		if trainer.prediction[i] != data_set.Classes[i] {
			trainer.weights[i] *= math.Exp(b)

			if trainer.weights[i] < minimalWeight { /* this is to deal with rounding errors */
				trainer.weights[i] = 0
			}
		} else {
			//trainer.weights[i] *= math.Exp(-b)
		}
		weights_sum += trainer.weights[i]
	}

	// normalize weights to 1.0 sum
	var normalized_sum float64
	for i := range trainer.weights {
		trainer.weights[i] /= weights_sum
		normalized_sum += trainer.weights[i]
	}
	// dirty hack to guarantee 1.0 weights sum which is important to base model (regardless of rounding errors)
	trainer.weights[0] += 1.0 - normalized_sum

	return b, base_estimator, err
}
예제 #2
0
func testEmbeddedFeaturesFiltering(numBaseModels, maxTreeHeight int, train_dataset, test_dataset *mlearn.DataSet) (
	[]testEmbeddedFeaturesFilteringResult, float64) {

	fmt.Println("training adaboost classifier with embedded features filtering ...")
	cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset,
		adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10, EnableEmbeddedFeaturesRanking: true})

	adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer)
	classifier := adaboostTrainer.TrainClassifier(train_dataset,
		adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels, EnableEmbeddedFeaturesRanking: true})
	ranked_features := adaboostTrainer.GetRankedFeatures()

	fmt.Println("predicting ...")
	predictions := make([]int, len(test_dataset.Classes))
	for i := range predictions {
		predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i))
	}

	precision, recall, ref_f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
	fmt.Printf("reference precision: %.3v  recall: %.3v  f1: %.3v \n\n", precision, recall, ref_f1)

	var test_results []testEmbeddedFeaturesFilteringResult
	for j := 10; j < test_dataset.FeaturesNum; j++ {
		fmt.Printf("now training again using only %v of selected features ...\n", j)
		selected_features := ranked_features[:j]
		fmt.Println("selected features are: ")
		fmt.Println(selected_features)

		train_data_subset := &mlearn.DataSet{}
		test_data_subset := &mlearn.DataSet{}
		*train_data_subset = *train_dataset
		*test_data_subset = *test_dataset
		train_data_subset.SubsetFeatures(selected_features)
		test_data_subset.SubsetFeatures(selected_features)

		fmt.Println()
		cartTrainer = adaboost.NewCARTClassifierTrainer(train_data_subset,
			adaboost.CARTClassifierTrainOptions{MaxDepth: int(maxTreeHeight), MinElementsInLeaf: 10})

		adaboostTrainer = adaboost.NewAdaboostClassifierTrainer(cartTrainer)
		classifier = adaboostTrainer.TrainClassifier(train_data_subset,
			adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels})

		fmt.Println("predicting ...")
		predictions = make([]int, len(test_data_subset.Classes))
		for i := range predictions {
			predictions[i] = classifier.PredictProbe(test_data_subset.GetSample(i))
		}

		precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
		fmt.Printf("precision: %.3v  recall: %.3v  f1: %.3v \n", precision, recall, f1)

		test_results = append(test_results, testEmbeddedFeaturesFilteringResult{FeaturesCount: j, F1: f1})
	}

	return test_results, ref_f1
}
예제 #3
0
func testCARTClassifier(train_dataset, test_dataset *mlearn.DataSet) {
	fmt.Println("testing CART classifier itself ...")
	cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset,
		adaboost.CARTClassifierTrainOptions{MaxDepth: 0, MinElementsInLeaf: 1})

	classifier := cartTrainer.TrainClassifier(train_dataset)

	predictions := make([]int, len(test_dataset.Classes))
	for i := range predictions {
		predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i))
	}

	precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
	// here we expect f1 == 1.0
	fmt.Printf("precision: %.3v  recall: %.3v  f1: %.3v \n\n", precision, recall, f1)
}
예제 #4
0
func testAdaboostClassifier(numBaseModels, maxTreeHeight, minLeafItems int, train_dataset, test_dataset *mlearn.DataSet) {
	fmt.Println("training adaboost classifier over CART trees ...")
	cartTrainer := adaboost.NewCARTClassifierTrainer(train_dataset,
		adaboost.CARTClassifierTrainOptions{MaxDepth: maxTreeHeight, MinElementsInLeaf: minLeafItems})

	adaboostTrainer := adaboost.NewAdaboostClassifierTrainer(cartTrainer)
	classifier := adaboostTrainer.TrainClassifier(train_dataset,
		adaboost.AdaboostClassifierTrainOptions{MaxEstimators: numBaseModels})

	fmt.Println("predicting ...")
	predictions := make([]int, len(test_dataset.Classes))
	for i := range predictions {
		predictions[i] = classifier.PredictProbe(test_dataset.GetSample(i))
	}

	precision, recall, f1 := mlearn.PrecisionRecallF1(predictions, test_dataset.Classes, test_dataset.ClassesNum)
	fmt.Printf("\nprecision: %.3v  recall: %.3v  f1: %.3v \n", precision, recall, f1)
}