Golang ClasetInterface.GetClassAsStrings Exemples

Langage de programmation: Golang

Espace de nommage/Pack: github.com/shuLhan/tabula

Class/Type: ClasetInterface

Méthode/Fonction: GetClassAsStrings

Exemples au hotexamples.com: 5

Golang ClasetInterface.GetClassAsStrings - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de github.com/shuLhan/tabula.ClasetInterface.GetClassAsStrings extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

GetRow(6)

GetClassAsStrings(5)

GetClassValueSpace(4)

GetNColumn(4)

GetColumns(3)

GetNRow(3)

GetDataAsRows(2)

GetColumn(2)

RecountMajorMinor(2)

GetClassIndex(2)

DeleteRow(2)

GetClassType(1)

Counts(1)

GetClassColumn(1)

GetClassAsReals(1)

GetRows(1)

IsInSingleClass(1)

MajorityClass(1)

PushRow(1)

Clone(1)

Méthodes fréquemment utilisées

GetRow (6)

GetClassAsStrings (5)

GetClassValueSpace (4)

GetNColumn (4)

GetColumns (3)

GetNRow (3)

GetDataAsRows (2)

GetColumn (2)

RecountMajorMinor (2)

GetClassIndex (2)

Méthodes fréquemment utilisées

DeleteRow (2)

GetClassType (1)

Counts (1)

GetClassColumn (1)

GetClassAsReals (1)

GetRows (1)

IsInSingleClass (1)

MajorityClass (1)

PushRow (1)

Clone (1)

Exemple #1

0

Afficher le fichier

Fichier : rf.go Projet : shuLhan/go-mining

// // ClassifySet given a samples predict their class by running each sample in // forest, adn return their class prediction with confusion matrix. // `samples` is the sample that will be predicted, `sampleIds` is the index of // samples. // If `sampleIds` is not nil, then sample index will be checked in each tree, // if the sample is used for training, their vote is not counted. // // Algorithm, // // (0) Get value space (possible class values in dataset) // (1) For each row in test-set, // (1.1) collect votes in all trees, // (1.2) select majority class vote, and // (1.3) compute and save the actual class probabilities. // (2) Compute confusion matrix from predictions. // (3) Compute stat from confusion matrix. // (4) Write the stat to file only if sampleIds is empty, which mean its run // not from OOB set. // func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface, sampleIds []int, ) ( predicts []string, cm *classifier.CM, probs []float64, ) { stat := classifier.Stat{} stat.Start() if len(sampleIds) <= 0 { fmt.Println(tag, "Classify set:", samples) fmt.Println(tag, "Classify set sample (one row):", samples.GetRow(0)) } // (0) vs := samples.GetClassValueSpace() actuals := samples.GetClassAsStrings() sampleIdx := -1 // (1) rows := samples.GetRows() for x, row := range *rows { // (1.1) if len(sampleIds) > 0 { sampleIdx = sampleIds[x] } votes := forest.Votes(row, sampleIdx) // (1.2) classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false) _, idx, ok := numerus.Floats64FindMax(classProbs) if ok { predicts = append(predicts, vs[idx]) } // (1.3) probs = append(probs, classProbs[0]) } // (2) cm = forest.ComputeCM(sampleIds, vs, actuals, predicts) // (3) forest.ComputeStatFromCM(&stat, cm) stat.End() if len(sampleIds) <= 0 { fmt.Println(tag, "CM:", cm) fmt.Println(tag, "Classifying stat:", stat) _ = stat.Write(forest.StatFile) } return predicts, cm, probs }

Exemple #2

0

Afficher le fichier

Fichier : runtime.go Projet : shuLhan/go-mining

// // Performance given an actuals class label and their probabilities, compute // the performance statistic of classifier. // // Algorithm, // (1) Sort the probabilities in descending order. // (2) Sort the actuals and predicts using sorted index from probs // (3) Compute tpr, fpr, precision // (4) Write performance to file. // func (rt *Runtime) Performance(samples tabula.ClasetInterface, predicts []string, probs []float64, ) ( perfs Stats, ) { // (1) actuals := samples.GetClassAsStrings() sortedIds := numerus.IntCreateSeq(0, len(probs)-1) numerus.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs), false) // (2) tekstus.StringsSortByIndex(&actuals, sortedIds) tekstus.StringsSortByIndex(&predicts, sortedIds) // (3) rt.computePerfByProbs(samples, actuals, probs) return rt.perfs }

Exemple #3

0

Afficher le fichier

Fichier : cart.go Projet : shuLhan/go-mining

/* computeGain calculate the gini index for each value in each attribute. */ func (runtime *Runtime) computeGain(D tabula.ClasetInterface) ( gains []gini.Gini, ) { switch runtime.SplitMethod { case SplitMethodGini: // create gains value for all attribute minus target class. gains = make([]gini.Gini, D.GetNColumn()) } runtime.SelectRandomFeature(D) classVS := D.GetClassValueSpace() classIdx := D.GetClassIndex() classType := D.GetClassType() for x, col := range *D.GetColumns() { // skip class attribute. if x == classIdx { continue } // skip column flagged with parent if (col.Flag & ColFlagParent) == ColFlagParent { gains[x].Skip = true continue } // ignore column flagged with skip if (col.Flag & ColFlagSkip) == ColFlagSkip { gains[x].Skip = true continue } // compute gain. if col.GetType() == tabula.TReal { attr := col.ToFloatSlice() if classType == tabula.TString { target := D.GetClassAsStrings() gains[x].ComputeContinu(&attr, &target, &classVS) } else { targetReal := D.GetClassAsReals() classVSReal := tekstus.StringsToFloat64( classVS) gains[x].ComputeContinuFloat(&attr, &targetReal, &classVSReal) } } else { attr := col.ToStringSlice() attrV := col.ValueSpace if DEBUG >= 2 { fmt.Println("[cart] attr :", attr) fmt.Println("[cart] attrV:", attrV) } target := D.GetClassAsStrings() gains[x].ComputeDiscrete(&attr, &attrV, &target, &classVS) } if DEBUG >= 2 { fmt.Println("[cart] gain :", gains[x]) } } return }

Exemple #4

0

Afficher le fichier

Fichier : cart.go Projet : shuLhan/go-mining

/* splitTreeByGain calculate the gain in all dataset, and split into two node: left and right. Return node with the split information. */ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( node *binary.BTNode, e error, ) { node = &binary.BTNode{} D.RecountMajorMinor() // if dataset is empty return node labeled with majority classes in // dataset. nrow := D.GetNRow() if nrow <= 0 { if DEBUG >= 2 { fmt.Printf("[cart] empty dataset (%s) : %v\n", D.MajorityClass(), D) } node.Value = NodeValue{ IsLeaf: true, Class: D.MajorityClass(), Size: 0, } return node, nil } // if all dataset is in the same class, return node as leaf with class // is set to that class. single, name := D.IsInSingleClass() if single { if DEBUG >= 2 { fmt.Printf("[cart] in single class (%s): %v\n", name, D.GetColumns()) } node.Value = NodeValue{ IsLeaf: true, Class: name, Size: nrow, } return node, nil } if DEBUG >= 2 { fmt.Println("[cart] D:", D) } // calculate the Gini gain for each attribute. gains := runtime.computeGain(D) // get attribute with maximum Gini gain. MaxGainIdx := gini.FindMaxGain(&gains) MaxGain := gains[MaxGainIdx] // if maxgain value is 0, use majority class as node and terminate // the process if MaxGain.GetMaxGainValue() == 0 { if DEBUG >= 2 { fmt.Println("[cart] max gain 0 with target", D.GetClassAsStrings(), " and majority class is ", D.MajorityClass()) } node.Value = NodeValue{ IsLeaf: true, Class: D.MajorityClass(), Size: 0, } return node, nil } // using the sorted index in MaxGain, sort all field in dataset tabula.SortColumnsByIndex(D, MaxGain.SortedIndex) if DEBUG >= 2 { fmt.Println("[cart] maxgain:", MaxGain) } // Now that we have attribute with max gain in MaxGainIdx, and their // gain dan partition value in Gains[MaxGainIdx] and // GetMaxPartValue(), we split the dataset based on type of max-gain // attribute. // If its continuous, split the attribute using numeric value. // If its discrete, split the attribute using subset (partition) of // nominal values. var splitV interface{} if MaxGain.IsContinu { splitV = MaxGain.GetMaxPartGainValue() } else { attrPartV := MaxGain.GetMaxPartGainValue() attrSubV := attrPartV.(tekstus.ListStrings) splitV = attrSubV[0].Normalize() } if DEBUG >= 2 { fmt.Println("[cart] maxgainindex:", MaxGainIdx) fmt.Println("[cart] split v:", splitV) } node.Value = NodeValue{ SplitAttrName: D.GetColumn(MaxGainIdx).GetName(), IsLeaf: false, IsContinu: MaxGain.IsContinu, Size: nrow, SplitAttrIdx: MaxGainIdx, SplitV: splitV, } dsL, dsR, e := tabula.SplitRowsByValue(D, MaxGainIdx, splitV) if e != nil { return node, e } splitL := dsL.(tabula.ClasetInterface) splitR := dsR.(tabula.ClasetInterface) // Set the flag to parent in attribute referenced by // MaxGainIdx, so it will not computed again in the next round. cols := splitL.GetColumns() for x := range *cols { if x == MaxGainIdx { (*cols)[x].Flag = ColFlagParent } else { (*cols)[x].Flag = 0 } } cols = splitR.GetColumns() for x := range *cols { if x == MaxGainIdx { (*cols)[x].Flag = ColFlagParent } else { (*cols)[x].Flag = 0 } } nodeLeft, e := runtime.splitTreeByGain(splitL) if e != nil { return node, e } nodeRight, e := runtime.splitTreeByGain(splitR) if e != nil { return node, e } node.SetLeft(nodeLeft) node.SetRight(nodeRight) return node, nil }

Exemple #5

0

Afficher le fichier

Fichier : crf.go Projet : shuLhan/go-mining

// // ClassifySetByWeight will classify each instance in samples by weight // with respect to its single performance. // // Algorithm, // (1) For each instance in samples, // (1.1) for each stage, // (1.1.1) collect votes for instance in current stage. // (1.1.2) Compute probabilities of each classes in votes. // // prob_class = count_of_class / total_votes // // (1.1.3) Compute total of probabilites times of stage weight. // // stage_prob = prob_class * stage_weight // // (1.2) Divide each class stage probabilites with // // stage_prob = stage_prob / // (sum_of_all_weights * number_of_tree_in_forest) // // (1.3) Select class label with highest probabilites. // (1.4) Save stage probabilities for positive class. // (2) Compute confusion matrix. // func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface, sampleIds []int, ) ( predicts []string, cm *classifier.CM, probs []float64, ) { stat := classifier.Stat{} stat.Start() vs := samples.GetClassValueSpace() stageProbs := make([]float64, len(vs)) stageSumProbs := make([]float64, len(vs)) sumWeights := numerus.Floats64Sum(crf.weights) // (1) rows := samples.GetDataAsRows() for _, row := range *rows { for y := range stageSumProbs { stageSumProbs[y] = 0 } // (1.1) for y, forest := range crf.forests { // (1.1.1) votes := forest.Votes(row, -1) // (1.1.2) probs := tekstus.WordsProbabilitiesOf(votes, vs, false) // (1.1.3) for z := range probs { stageSumProbs[z] += probs[z] stageProbs[z] += probs[z] * crf.weights[y] } } // (1.2) stageWeight := sumWeights * float64(crf.NTree) for x := range stageProbs { stageProbs[x] = stageProbs[x] / stageWeight } // (1.3) _, maxi, ok := numerus.Floats64FindMax(stageProbs) if ok { predicts = append(predicts, vs[maxi]) } probs = append(probs, stageSumProbs[0]/ float64(len(crf.forests))) } // (2) actuals := samples.GetClassAsStrings() cm = crf.ComputeCM(sampleIds, vs, actuals, predicts) crf.ComputeStatFromCM(&stat, cm) stat.End() _ = stat.Write(crf.StatFile) return predicts, cm, probs }