Exemple #1
0
//
// computePerfByProbs will compute classifier performance using probabilities
// or score `probs`.
//
// This currently only work for two class problem.
//
func (rt *Runtime) computePerfByProbs(samples tabula.ClasetInterface,
	actuals []string, probs []float64,
) {
	vs := samples.GetClassValueSpace()
	nactuals := numerus.IntsTo64(samples.Counts())
	nclass := tekstus.WordsCountTokens(actuals, vs, false)

	pprev := math.Inf(-1)
	tp := int64(0)
	fp := int64(0)
	tpprev := int64(0)
	fpprev := int64(0)

	auc := float64(0)

	for x, p := range probs {
		if p != pprev {
			stat := Stat{}
			stat.SetTPRate(tp, nactuals[0])
			stat.SetFPRate(fp, nactuals[1])
			stat.SetPrecisionFromRate(nactuals[0], nactuals[1])

			auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
			stat.SetAUC(auc)

			rt.perfs = append(rt.perfs, &stat)

			pprev = p
			tpprev = tp
			fpprev = fp
		}

		if actuals[x] == vs[0] {
			tp++
		} else {
			fp++
		}
	}

	stat := Stat{}
	stat.SetTPRate(tp, nactuals[0])
	stat.SetFPRate(fp, nactuals[1])
	stat.SetPrecisionFromRate(nactuals[0], nactuals[1])

	auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
	auc = auc / float64(nclass[0]*nclass[1])
	stat.SetAUC(auc)

	rt.perfs = append(rt.perfs, &stat)

	if len(rt.perfs) >= 2 {
		// Replace the first stat with second stat, because of NaN
		// value on the first precision.
		rt.perfs[0] = rt.perfs[1]
	}
}