Exemplo n.º 1
0
//
// Init will initialize LNSmote runtime by checking input values and set it to
// default if not set or invalid.
//
func (in *Runtime) Init(dataset tabula.ClasetInterface) {
	in.Runtime.Init()

	in.NSynthetic = in.PercentOver / 100.0
	in.datasetRows = dataset.GetDataAsRows()

	in.minorset = tabula.SelectRowsWhere(dataset, in.ClassIndex,
		in.ClassMinor)

	in.outliers = make(tabula.Rows, 0)

	if DEBUG >= 1 {
		fmt.Println("[lnsmote] n:", in.NSynthetic)
		fmt.Println("[lnsmote] n minority:", in.minorset.Len())
	}
}
Exemplo n.º 2
0
//
// ClassifySetByWeight will classify each instance in samples by weight
// with respect to its single performance.
//
// Algorithm,
// (1) For each instance in samples,
// (1.1) for each stage,
// (1.1.1) collect votes for instance in current stage.
// (1.1.2) Compute probabilities of each classes in votes.
//
//		prob_class = count_of_class / total_votes
//
// (1.1.3) Compute total of probabilites times of stage weight.
//
//		stage_prob = prob_class * stage_weight
//
// (1.2) Divide each class stage probabilites with
//
//		stage_prob = stage_prob /
//			(sum_of_all_weights * number_of_tree_in_forest)
//
// (1.3) Select class label with highest probabilites.
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
//
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
	sampleIds []int,
) (
	predicts []string, cm *classifier.CM, probs []float64,
) {
	stat := classifier.Stat{}
	stat.Start()

	vs := samples.GetClassValueSpace()
	stageProbs := make([]float64, len(vs))
	stageSumProbs := make([]float64, len(vs))
	sumWeights := numerus.Floats64Sum(crf.weights)

	// (1)
	rows := samples.GetDataAsRows()
	for _, row := range *rows {
		for y := range stageSumProbs {
			stageSumProbs[y] = 0
		}

		// (1.1)
		for y, forest := range crf.forests {
			// (1.1.1)
			votes := forest.Votes(row, -1)

			// (1.1.2)
			probs := tekstus.WordsProbabilitiesOf(votes, vs, false)

			// (1.1.3)
			for z := range probs {
				stageSumProbs[z] += probs[z]
				stageProbs[z] += probs[z] * crf.weights[y]
			}
		}

		// (1.2)
		stageWeight := sumWeights * float64(crf.NTree)

		for x := range stageProbs {
			stageProbs[x] = stageProbs[x] / stageWeight
		}

		// (1.3)
		_, maxi, ok := numerus.Floats64FindMax(stageProbs)
		if ok {
			predicts = append(predicts, vs[maxi])
		}

		probs = append(probs, stageSumProbs[0]/
			float64(len(crf.forests)))
	}

	// (2)
	actuals := samples.GetClassAsStrings()
	cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)

	crf.ComputeStatFromCM(&stat, cm)
	stat.End()

	_ = stat.Write(crf.StatFile)

	return predicts, cm, probs
}