// // ClassifySet given a samples predict their class by running each sample in // forest, adn return their class prediction with confusion matrix. // `samples` is the sample that will be predicted, `sampleIds` is the index of // samples. // If `sampleIds` is not nil, then sample index will be checked in each tree, // if the sample is used for training, their vote is not counted. // // Algorithm, // // (0) Get value space (possible class values in dataset) // (1) For each row in test-set, // (1.1) collect votes in all trees, // (1.2) select majority class vote, and // (1.3) compute and save the actual class probabilities. // (2) Compute confusion matrix from predictions. // (3) Compute stat from confusion matrix. // (4) Write the stat to file only if sampleIds is empty, which mean its run // not from OOB set. // func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface, sampleIds []int, ) ( predicts []string, cm *classifier.CM, probs []float64, ) { stat := classifier.Stat{} stat.Start() if len(sampleIds) <= 0 { fmt.Println(tag, "Classify set:", samples) fmt.Println(tag, "Classify set sample (one row):", samples.GetRow(0)) } // (0) vs := samples.GetClassValueSpace() actuals := samples.GetClassAsStrings() sampleIdx := -1 // (1) rows := samples.GetRows() for x, row := range *rows { // (1.1) if len(sampleIds) > 0 { sampleIdx = sampleIds[x] } votes := forest.Votes(row, sampleIdx) // (1.2) classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false) _, idx, ok := numerus.Floats64FindMax(classProbs) if ok { predicts = append(predicts, vs[idx]) } // (1.3) probs = append(probs, classProbs[0]) } // (2) cm = forest.ComputeCM(sampleIds, vs, actuals, predicts) // (3) forest.ComputeStatFromCM(&stat, cm) stat.End() if len(sampleIds) <= 0 { fmt.Println(tag, "CM:", cm) fmt.Println(tag, "Classifying stat:", stat) _ = stat.Write(forest.StatFile) } return predicts, cm, probs }