Beispiel #1
0
func TestWordsCountsOf(t *testing.T) {
	data := []string{"A", "B", "A", "C"}
	class := []string{"A", "B"}
	exp := []int{2, 1}

	got := tekstus.WordsCountTokens(data, class, false)

	assert(t, exp, got, true)
}
Beispiel #2
0
//
// computePerfByProbs will compute classifier performance using probabilities
// or score `probs`.
//
// This currently only work for two class problem.
//
func (rt *Runtime) computePerfByProbs(samples tabula.ClasetInterface,
	actuals []string, probs []float64,
) {
	vs := samples.GetClassValueSpace()
	nactuals := numerus.IntsTo64(samples.Counts())
	nclass := tekstus.WordsCountTokens(actuals, vs, false)

	pprev := math.Inf(-1)
	tp := int64(0)
	fp := int64(0)
	tpprev := int64(0)
	fpprev := int64(0)

	auc := float64(0)

	for x, p := range probs {
		if p != pprev {
			stat := Stat{}
			stat.SetTPRate(tp, nactuals[0])
			stat.SetFPRate(fp, nactuals[1])
			stat.SetPrecisionFromRate(nactuals[0], nactuals[1])

			auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
			stat.SetAUC(auc)

			rt.perfs = append(rt.perfs, &stat)

			pprev = p
			tpprev = tp
			fpprev = fp
		}

		if actuals[x] == vs[0] {
			tp++
		} else {
			fp++
		}
	}

	stat := Stat{}
	stat.SetTPRate(tp, nactuals[0])
	stat.SetFPRate(fp, nactuals[1])
	stat.SetPrecisionFromRate(nactuals[0], nactuals[1])

	auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
	auc = auc / float64(nclass[0]*nclass[1])
	stat.SetAUC(auc)

	rt.perfs = append(rt.perfs, &stat)

	if len(rt.perfs) >= 2 {
		// Replace the first stat with second stat, because of NaN
		// value on the first precision.
		rt.perfs[0] = rt.perfs[1]
	}
}
Beispiel #3
0
/*
compute value for attribute T.

Return Gini value in the form of,

	1 - sum (probability of each classes in T)
*/
func (gini *Gini) compute(T *[]string, C *[]string) float64 {
	n := float64(len(*T))
	if n == 0 {
		return 0
	}

	classCount := tekstus.WordsCountTokens(*T, *C, true)

	var sump2 float64

	for x, v := range classCount {
		p := float64(v) / n
		sump2 += (p * p)

		if DEBUG >= 3 {
			fmt.Printf("[gini] compute (%s): (%f/%f)^2 = %f\n",
				(*C)[x], v, n, p*p)
		}

	}

	return 1 - sump2
}