// // Init will initialize LNSmote runtime by checking input values and set it to // default if not set or invalid. // func (in *Runtime) Init(dataset tabula.ClasetInterface) { in.Runtime.Init() in.NSynthetic = in.PercentOver / 100.0 in.datasetRows = dataset.GetDataAsRows() in.minorset = tabula.SelectRowsWhere(dataset, in.ClassIndex, in.ClassMinor) in.outliers = make(tabula.Rows, 0) if DEBUG >= 1 { fmt.Println("[lnsmote] n:", in.NSynthetic) fmt.Println("[lnsmote] n minority:", in.minorset.Len()) } }
// // ClassifySetByWeight will classify each instance in samples by weight // with respect to its single performance. // // Algorithm, // (1) For each instance in samples, // (1.1) for each stage, // (1.1.1) collect votes for instance in current stage. // (1.1.2) Compute probabilities of each classes in votes. // // prob_class = count_of_class / total_votes // // (1.1.3) Compute total of probabilites times of stage weight. // // stage_prob = prob_class * stage_weight // // (1.2) Divide each class stage probabilites with // // stage_prob = stage_prob / // (sum_of_all_weights * number_of_tree_in_forest) // // (1.3) Select class label with highest probabilites. // (1.4) Save stage probabilities for positive class. // (2) Compute confusion matrix. // func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface, sampleIds []int, ) ( predicts []string, cm *classifier.CM, probs []float64, ) { stat := classifier.Stat{} stat.Start() vs := samples.GetClassValueSpace() stageProbs := make([]float64, len(vs)) stageSumProbs := make([]float64, len(vs)) sumWeights := numerus.Floats64Sum(crf.weights) // (1) rows := samples.GetDataAsRows() for _, row := range *rows { for y := range stageSumProbs { stageSumProbs[y] = 0 } // (1.1) for y, forest := range crf.forests { // (1.1.1) votes := forest.Votes(row, -1) // (1.1.2) probs := tekstus.WordsProbabilitiesOf(votes, vs, false) // (1.1.3) for z := range probs { stageSumProbs[z] += probs[z] stageProbs[z] += probs[z] * crf.weights[y] } } // (1.2) stageWeight := sumWeights * float64(crf.NTree) for x := range stageProbs { stageProbs[x] = stageProbs[x] / stageWeight } // (1.3) _, maxi, ok := numerus.Floats64FindMax(stageProbs) if ok { predicts = append(predicts, vs[maxi]) } probs = append(probs, stageSumProbs[0]/ float64(len(crf.forests))) } // (2) actuals := samples.GetClassAsStrings() cm = crf.ComputeCM(sampleIds, vs, actuals, predicts) crf.ComputeStatFromCM(&stat, cm) stat.End() _ = stat.Write(crf.StatFile) return predicts, cm, probs }