Esempio n. 1
0
/*
ComputeContinu Given an attribute A and the target attribute T which contain
N classes in C, compute the information gain of A.

The result of Gini partitions value, Gini Index, and Gini Gain is saved in
ContinuPart, Index, and Gain.
*/
func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) {
	gini.IsContinu = true

	// make a copy of attribute and target.
	A2 := make([]float64, len(*A))
	copy(A2, *A)

	T2 := make([]string, len(*T))
	copy(T2, *T)

	gini.SortedIndex = numerus.Floats64IndirectSort(A2, true)

	if DEBUG >= 1 {
		fmt.Println("[gini] attr sorted :", A2)
	}

	// sort the target attribute using sorted index.
	tekstus.StringsSortByIndex(&T2, gini.SortedIndex)

	// create partition
	gini.createContinuPartition(&A2)

	// create holder for gini index and gini gain
	gini.Index = make([]float64, len(gini.ContinuPart))
	gini.Gain = make([]float64, len(gini.ContinuPart))
	gini.MinIndexValue = 1.0

	// compute gini index for all samples
	gini.Value = gini.compute(&T2, C)

	gini.computeContinuGain(&A2, &T2, C)
}
Esempio n. 2
0
func TestStringsSortByIndex(t *testing.T) {
	dat := []string{"Z", "X", "C", "V", "B", "N", "M"}
	exp := []string{"B", "C", "M", "N", "V", "X", "Z"}
	ids := []int{4, 2, 6, 5, 3, 1, 0}

	tekstus.StringsSortByIndex(&dat, ids)

	assert(t, exp, dat, true)
}
Esempio n. 3
0
//
// Performance given an actuals class label and their probabilities, compute
// the performance statistic of classifier.
//
// Algorithm,
// (1) Sort the probabilities in descending order.
// (2) Sort the actuals and predicts using sorted index from probs
// (3) Compute tpr, fpr, precision
// (4) Write performance to file.
//
func (rt *Runtime) Performance(samples tabula.ClasetInterface,
	predicts []string, probs []float64,
) (
	perfs Stats,
) {
	// (1)
	actuals := samples.GetClassAsStrings()
	sortedIds := numerus.IntCreateSeq(0, len(probs)-1)
	numerus.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs),
		false)

	// (2)
	tekstus.StringsSortByIndex(&actuals, sortedIds)
	tekstus.StringsSortByIndex(&predicts, sortedIds)

	// (3)
	rt.computePerfByProbs(samples, actuals, probs)

	return rt.perfs
}