func (ig *InfoGain) BuildEvaluator(instances data.Instances) { classIndex := instances.ClassIndex() numInstances := len(instances.Instances()) if ig.binarize { //binarize instances //implement NumericToBinary function ntb := NewNumericToBinary() ntb.Exec(instances) instances = ntb.Output() fmt.Println(instances.Instances()) } else { //discretize instances //implement Discretize function } numClasses := instances.Attribute(classIndex).NumValues() // Reserve space and initialize counters counts := make([][][]float64, len(instances.Attributes())) //initialize first dimension for k := range instances.Attributes() { //fmt.Println(k) if k != classIndex { numValues := len(instances.Attributes()[k].Values()) counts[k] = make([][]float64, numValues+1) //initialize second dimension for i := range counts[k] { counts[k][i] = make([]float64, numClasses+1) //initialize third dimension } } } // Initialize counters fmt.Println(numClasses, "numclasses") temp := make([]float64, numClasses+1) for k := 0; k < numInstances; k++ { inst := instances.Instance(k) if inst.ClassMissing(classIndex) { //check that class if the class is missing /*implement method to do that*/ temp[numClasses] += inst.Weight() } else { fmt.Println(int(inst.ClassValue(classIndex)), "classIndexes", inst.Weight(), "weights") temp[int(inst.ClassValue(classIndex))] += inst.Weight() //get the index of the value of the class } } fmt.Println(temp) for k := range counts { if k != classIndex { for i := range temp { counts[k][0][i] = temp[i] } } } // Get counts //inst.RealValues()[classIndex]) check this after finish, may contains errors, its have to be check if the classIndex exists if not return 0 /*see weka*/ //implement the necessary methods to make easier this implementation and not bugs friendly //New methods already implemented!!!!!!!! Later check it's functioning for k := 0; k < numInstances; k++ { inst := instances.Instance(k) for i := range inst.RealValues() { if inst.Index(i) != classIndex { if inst.IsMissingValue(i) || inst.ClassMissing(classIndex) { //if is missing the real value and the class if !inst.IsMissingValue(i) { counts[inst.Index(i)][int(inst.ValueSparse(i))][numClasses] += inst.Weight() counts[inst.Index(i)][0][numClasses] -= inst.Weight() } else if !inst.IsMissingValue(classIndex) { counts[inst.Index(i)][instances.Attribute(inst.Index(i)).NumValues()][int(inst.ClassValue(classIndex))] += inst.Weight() //tongue twister, now its not counts[inst.Index(i)][0][int(inst.ClassValue(classIndex))] -= inst.Weight() } else { counts[inst.Index(i)][instances.Attribute(inst.Index(i)).NumValues()][numClasses] += inst.Weight() counts[inst.Index(i)][0][numClasses] -= inst.Weight() } } else { counts[inst.Index(i)][int(inst.ValueSparse(i))][int(inst.ClassValue(classIndex))] += inst.Weight() counts[inst.Index(i)][0][int(inst.ClassValue(classIndex))] -= inst.Weight() } } } } // distribute missing counts if required if ig.missingMerge { for k := range instances.Attributes() { if k != classIndex { numValues := len(instances.Attributes()[k].Values()) // Compute marginals rowSums := make([]float64, numValues) columnSums := make([]float64, numClasses) sum := 0.0 for i := 0; i < numValues; i++ { for j := 0; j < numClasses; j++ { rowSums[i] += counts[k][i][j] columnSums[j] += counts[k][i][j] } sum += rowSums[i] } if utils.Gr(sum, 0) { additions := make([][]float64, numValues) //initializes slices for i := range additions { additions[i] = make([]float64, numClasses) } // Compute what needs to be added to each row for i := range additions { for j := range additions[i] { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j] } } // Compute what needs to be added to each column for i := 0; i < numClasses; i++ { for j := 0; j < numValues; j++ { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses] } } // Compute what needs to be added to each cell for i := 0; i < numClasses; i++ { for j := 0; j < numValues; j++ { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses] } } // Make new contingency table newTable := make([][]float64, numValues) //initializes slices for i := range newTable { newTable[i] = make([]float64, numClasses) } for i := range newTable { for j := range newTable[i] { newTable[i][j] = counts[k][i][j] + additions[i][j] } } counts[k] = newTable } } } } // Compute info gains ig.infoGains = make([]float64, len(instances.Attributes())) for i := range instances.Attributes() { if i != classIndex { ig.infoGains[i] = entropyOverColumns(counts[i]) - entropyConditionedOnRows(counts[i]) } } //fmt.Println(ig.infoGains, "infogain") }