// Predict issues predictions. Each class-specific classifier is expected // to output a value between 0 (indicating that a given instance is not // a given class) and 1 (indicating that the given instance is definitely // that class). For each instance, the class with the highest value is chosen. // The result is undefined if several underlying models output the same value. func (m *OneVsAllModel) Predict(what base.FixedDataGrid) (base.FixedDataGrid, error) { ret := base.GeneratePredictionVector(what) vecs := make([]base.FixedDataGrid, m.maxClassVal+1) specs := make([]base.AttributeSpec, m.maxClassVal+1) for i := uint64(0); i <= m.maxClassVal; i++ { f := m.filters[i] c := base.NewLazilyFilteredInstances(what, f) p, err := m.classifiers[i].Predict(c) if err != nil { return nil, err } vecs[i] = p specs[i] = base.ResolveAttributes(p, p.AllClassAttributes())[0] } _, rows := ret.Size() spec := base.ResolveAttributes(ret, ret.AllClassAttributes())[0] for i := 0; i < rows; i++ { class := uint64(0) best := 0.0 for j := uint64(0); j <= m.maxClassVal; j++ { val := base.UnpackBytesToFloat(vecs[j].Get(specs[j], i)) if val > best { class = j best = val } } ret.Set(spec, i, base.PackU64ToBytes(class)) } return ret, nil }
func (lr *LogisticRegression) Predict(X base.FixedDataGrid) base.FixedDataGrid { // Only support 1 class Attribute classAttrs := X.AllClassAttributes() if len(classAttrs) != 1 { panic(fmt.Sprintf("%d Wrong number of classes", len(classAttrs))) } // Generate return structure ret := base.GeneratePredictionVector(X) classAttrSpecs := base.ResolveAttributes(ret, classAttrs) // Retrieve numeric non-class Attributes numericAttrs := base.NonClassFloatAttributes(X) numericAttrSpecs := base.ResolveAttributes(X, numericAttrs) // Allocate row storage row := make([]float64, len(numericAttrSpecs)) X.MapOverRows(numericAttrSpecs, func(rowBytes [][]byte, rowNo int) (bool, error) { for i, r := range rowBytes { row[i] = base.UnpackBytesToFloat(r) } val := Predict(lr.model, row) vals := base.PackFloatToBytes(val) ret.Set(classAttrSpecs[0], rowNo, vals) return true, nil }) return ret }
// Predict outputs a base.Instances containing predictions from this tree func (d *DecisionTreeNode) Predict(what base.FixedDataGrid) (base.FixedDataGrid, error) { predictions := base.GeneratePredictionVector(what) classAttr := getClassAttr(predictions) classAttrSpec, err := predictions.GetAttribute(classAttr) if err != nil { panic(err) } predAttrs := base.AttributeDifferenceReferences(what.AllAttributes(), predictions.AllClassAttributes()) predAttrSpecs := base.ResolveAttributes(what, predAttrs) what.MapOverRows(predAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { cur := d for { if cur.Children == nil { predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) break } else { splitVal := cur.SplitRule.SplitVal at := cur.SplitRule.SplitAttr ats, err := what.GetAttribute(at) if err != nil { //predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) //break panic(err) } var classVar string if _, ok := ats.GetAttribute().(*base.FloatAttribute); ok { // If it's a numeric Attribute (e.g. FloatAttribute) check that // the value of the current node is greater than the old one classVal := base.UnpackBytesToFloat(what.Get(ats, rowNo)) if classVal > splitVal { classVar = "1" } else { classVar = "0" } } else { classVar = ats.GetAttribute().GetStringFromSysVal(what.Get(ats, rowNo)) } if next, ok := cur.Children[classVar]; ok { cur = next } else { // Suspicious of this var bestChild string for c := range cur.Children { bestChild = c if c > classVar { break } } cur = cur.Children[bestChild] } } } return true, nil }) return predictions, nil }
// Predict is just a wrapper for the PredictOne function. // // IMPORTANT: Predict panics if Fit was not called or if the // document vector and train matrix have a different number of columns. func (nb *BernoulliNBClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid { // Generate return vector ret := base.GeneratePredictionVector(what) // Get the features featAttrSpecs := base.ResolveAttributes(what, nb.attrs) what.MapOverRows(featAttrSpecs, func(row [][]byte, i int) (bool, error) { base.SetClass(ret, i, nb.PredictOne(row)) return true, nil }) return ret }
// Predict outputs a base.Instances containing predictions from this tree func (d *DecisionTreeNode) Predict(what base.FixedDataGrid) base.FixedDataGrid { predictions := base.GeneratePredictionVector(what) classAttr := getClassAttr(predictions) classAttrSpec, err := predictions.GetAttribute(classAttr) if err != nil { panic(err) } predAttrs := base.AttributeDifferenceReferences(what.AllAttributes(), predictions.AllClassAttributes()) predAttrSpecs := base.ResolveAttributes(what, predAttrs) what.MapOverRows(predAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { cur := d for { if cur.Children == nil { predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) break } else { at := cur.SplitAttr ats, err := what.GetAttribute(at) if err != nil { predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) break } classVar := ats.GetAttribute().GetStringFromSysVal(what.Get(ats, rowNo)) if next, ok := cur.Children[classVar]; ok { cur = next } else { var bestChild string for c := range cur.Children { bestChild = c if c > classVar { break } } cur = cur.Children[bestChild] } } } return true, nil }) return predictions }
// param base.IFixedDataGrid // return base.IFixedDataGrid func (p *AveragePerceptron) Predict(what base.FixedDataGrid) base.FixedDataGrid { if !p.trained { panic("Cannot call Predict on an untrained AveragePerceptron") } data := processData(what) allAttrs := base.CheckCompatible(what, p.TrainingData) if allAttrs == nil { // Don't have the same Attributes return nil } // Remove the Attributes which aren't numeric allNumericAttrs := make([]base.Attribute, 0) for _, a := range allAttrs { if fAttr, ok := a.(*base.FloatAttribute); ok { allNumericAttrs = append(allNumericAttrs, fAttr) } } ret := base.GeneratePredictionVector(what) classAttr := ret.AllClassAttributes()[0] classSpec, err := ret.GetAttribute(classAttr) if err != nil { panic(err) } for i, datum := range data { result := p.score(datum) if result > 0.0 { ret.Set(classSpec, i, base.PackU64ToBytes(1)) } else { ret.Set(classSpec, 1, []byte{0, 0, 0, 0, 0, 0, 0, 0}) } } return ret }
func (lr *LinearRegression) Predict(X base.FixedDataGrid) (base.FixedDataGrid, error) { if !lr.fitted { return nil, NoTrainingDataError } ret := base.GeneratePredictionVector(X) attrSpecs := base.ResolveAttributes(X, lr.attrs) clsSpec, err := ret.GetAttribute(lr.cls) if err != nil { return nil, err } X.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { var prediction float64 = lr.disturbance for j, r := range row { prediction += base.UnpackBytesToFloat(r) * lr.regressionCoefficients[j] } ret.Set(clsSpec, i, base.PackFloatToBytes(prediction)) return true, nil }) return ret, nil }
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm. func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid { // Check what distance function we are using var distanceFunc pairwise.PairwiseDistanceFunc switch KNN.DistanceFunc { case "euclidean": distanceFunc = pairwise.NewEuclidean() case "manhattan": distanceFunc = pairwise.NewManhattan() default: panic("unsupported distance function") } // Check Compatibility allAttrs := base.CheckCompatible(what, KNN.TrainingData) if allAttrs == nil { // Don't have the same Attributes return nil } // Remove the Attributes which aren't numeric allNumericAttrs := make([]base.Attribute, 0) for _, a := range allAttrs { if fAttr, ok := a.(*base.FloatAttribute); ok { allNumericAttrs = append(allNumericAttrs, fAttr) } } // Generate return vector ret := base.GeneratePredictionVector(what) // Resolve Attribute specifications for both whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs) trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs) // Reserve storage for most the most similar items distances := make(map[int]float64) // Reserve storage for voting map maxmap := make(map[string]int) // Reserve storage for row computations trainRowBuf := make([]float64, len(allNumericAttrs)) predRowBuf := make([]float64, len(allNumericAttrs)) // Iterate over all outer rows what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { predRowBuf[i] = base.UnpackBytesToFloat(predRow[i]) } predMat := utilities.FloatsToMatrix(predRowBuf) // Find the closest match in the training data KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i]) } // Compute the distance trainMat := utilities.FloatsToMatrix(trainRowBuf) distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat) return true, nil }) sorted := utilities.SortIntMap(distances) values := sorted[:KNN.NearestNeighbours] // Reset maxMap for a := range maxmap { maxmap[a] = 0 } // Refresh maxMap for _, elem := range values { label := base.GetClass(KNN.TrainingData, elem) if _, ok := maxmap[label]; ok { maxmap[label]++ } else { maxmap[label] = 1 } } // Sort the maxMap var maxClass string maxVal := -1 for a := range maxmap { if maxmap[a] > maxVal { maxVal = maxmap[a] maxClass = a } } base.SetClass(ret, predRowNo, maxClass) return true, nil }) return ret }
// Predict gathers predictions from all the classifiers // and outputs the most common (majority) class // // IMPORTANT: in the event of a tie, the first class which // achieved the tie value is output. func (b *BaggedModel) Predict(from base.FixedDataGrid) base.FixedDataGrid { n := runtime.NumCPU() // Channel to receive the results as they come in votes := make(chan base.DataGrid, n) // Count the votes for each class voting := make(map[int](map[string]int)) // Create a goroutine to collect the votes var votingwait sync.WaitGroup votingwait.Add(1) go func() { for { // Need to resolve the voting problem incoming, ok := <-votes if ok { cSpecs := base.ResolveAttributes(incoming, incoming.AllClassAttributes()) incoming.MapOverRows(cSpecs, func(row [][]byte, predRow int) (bool, error) { // Check if we've seen this class before... if _, ok := voting[predRow]; !ok { // If we haven't, create an entry voting[predRow] = make(map[string]int) // Continue on the current row } voting[predRow][base.GetClass(incoming, predRow)]++ return true, nil }) } else { votingwait.Done() break } } }() // Create workers to process the predictions processpipe := make(chan int, n) var processwait sync.WaitGroup for i := 0; i < n; i++ { processwait.Add(1) go func() { for { if i, ok := <-processpipe; ok { c := b.Models[i] l := b.generatePredictionInstances(i, from) votes <- c.Predict(l) } else { processwait.Done() break } } }() } // Send all the models to the workers for prediction for i := range b.Models { processpipe <- i } close(processpipe) // Finished sending models to be predicted processwait.Wait() // Predictors all finished processing close(votes) // Close the vote channel and allow it to drain votingwait.Wait() // All the votes are in // Generate the overall consensus ret := base.GeneratePredictionVector(from) for i := range voting { maxClass := "" maxCount := 0 // Find the most popular class for c := range voting[i] { votes := voting[i][c] if votes > maxCount { maxClass = c maxCount = votes } } base.SetClass(ret, i, maxClass) } return ret }
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm. func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid { // Check what distance function we are using var distanceFunc pairwise.PairwiseDistanceFunc switch KNN.DistanceFunc { case "euclidean": distanceFunc = pairwise.NewEuclidean() case "manhattan": distanceFunc = pairwise.NewManhattan() default: panic("unsupported distance function") } // Check Compatibility allAttrs := base.CheckCompatible(what, KNN.TrainingData) if allAttrs == nil { // Don't have the same Attributes return nil } // Use optimised version if permitted if KNN.AllowOptimisations { if KNN.DistanceFunc == "euclidean" { if KNN.canUseOptimisations(what) { return KNN.optimisedEuclideanPredict(what.(*base.DenseInstances)) } } } fmt.Println("Optimisations are switched off") // Remove the Attributes which aren't numeric allNumericAttrs := make([]base.Attribute, 0) for _, a := range allAttrs { if fAttr, ok := a.(*base.FloatAttribute); ok { allNumericAttrs = append(allNumericAttrs, fAttr) } } // Generate return vector ret := base.GeneratePredictionVector(what) // Resolve Attribute specifications for both whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs) trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs) // Reserve storage for most the most similar items distances := make(map[int]float64) // Reserve storage for voting map maxmap := make(map[string]int) // Reserve storage for row computations trainRowBuf := make([]float64, len(allNumericAttrs)) predRowBuf := make([]float64, len(allNumericAttrs)) _, maxRow := what.Size() curRow := 0 // Iterate over all outer rows what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) { if (curRow%1) == 0 && curRow > 0 { fmt.Printf("KNN: %.2f %% done\n", float64(curRow)*100.0/float64(maxRow)) } curRow++ // Read the float values out for i, _ := range allNumericAttrs { predRowBuf[i] = base.UnpackBytesToFloat(predRow[i]) } predMat := utilities.FloatsToMatrix(predRowBuf) // Find the closest match in the training data KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i]) } // Compute the distance trainMat := utilities.FloatsToMatrix(trainRowBuf) distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat) return true, nil }) sorted := utilities.SortIntMap(distances) values := sorted[:KNN.NearestNeighbours] maxClass := KNN.vote(maxmap, values) base.SetClass(ret, predRowNo, maxClass) return true, nil }) return ret }
// Predict uses the underlying network to produce predictions for the // class variables of X. // // Can only predict one CategoricalAttribute at a time, or up to n // FloatAttributes. Set or unset ClassAttributes to work around this // limitation. func (m *MultiLayerNet) Predict(X base.FixedDataGrid) base.FixedDataGrid { // Create the return vector ret := base.GeneratePredictionVector(X) // Make sure everything's a FloatAttribute insts := m.convertToFloatInsts(X) // Get the input/output Attributes inputAttrs := base.NonClassAttributes(insts) outputAttrs := ret.AllClassAttributes() // Compute layers layers := 2 + len(m.layers) // Check that we're operating in a singular mode floatMode := 0 categoricalMode := 0 for _, a := range outputAttrs { if _, ok := a.(*base.CategoricalAttribute); ok { categoricalMode++ } else if _, ok := a.(*base.FloatAttribute); ok { floatMode++ } else { panic("Unsupported output Attribute type!") } } if floatMode > 0 && categoricalMode > 0 { panic("Can't predict a mix of float and categorical Attributes") } else if categoricalMode > 1 { panic("Can't predict more than one categorical class Attribute") } // Create the activation vector a := mat64.NewDense(m.network.size, 1, make([]float64, m.network.size)) // Resolve the input AttributeSpecs inputAs := base.ResolveAttributes(insts, inputAttrs) // Resolve the output Attributespecs outputAs := base.ResolveAttributes(ret, outputAttrs) // Map over each input row insts.MapOverRows(inputAs, func(row [][]byte, rc int) (bool, error) { // Clear the activation vector for i := 0; i < m.network.size; i++ { a.Set(i, 0, 0.0) } // Build the activation vector for i, vb := range row { if cIndex, ok := m.attrs[inputAs[i].GetAttribute()]; !ok { panic("Can't resolve the Attribute!") } else { a.Set(cIndex, 0, base.UnpackBytesToFloat(vb)) } } // Robots, activate! m.network.Activate(a, layers) // Decide which class to set if floatMode > 0 { for _, as := range outputAs { cIndex := m.attrs[as.GetAttribute()] ret.Set(as, rc, base.PackFloatToBytes(a.At(cIndex, 0))) } } else { maxIndex := 0 maxVal := 0.0 for i := m.classAttrOffset; i < m.classAttrOffset+m.classAttrCount; i++ { val := a.At(i, 0) if val > maxVal { maxIndex = i maxVal = val } } maxIndex -= m.classAttrOffset ret.Set(outputAs[0], rc, base.PackU64ToBytes(uint64(maxIndex))) } return true, nil }) return ret }
func (KNN *KNNClassifier) optimisedEuclideanPredict(d *base.DenseInstances) base.FixedDataGrid { // Create return vector ret := base.GeneratePredictionVector(d) // Type-assert training data tr := KNN.TrainingData.(*base.DenseInstances) // Enumeration of AttributeGroups agPos := make(map[string]int) agTrain := tr.AllAttributeGroups() agPred := d.AllAttributeGroups() classAttrs := tr.AllClassAttributes() counter := 0 for ag := range agTrain { // Detect whether the AttributeGroup has any classes in it attrs := agTrain[ag].Attributes() //matched := false if len(base.AttributeIntersect(classAttrs, attrs)) == 0 { agPos[ag] = counter } counter++ } // Pointers to the start of each prediction row rowPointers := make([]*C.double, len(agPred)) trainPointers := make([]*C.double, len(agPred)) rowSizes := make([]int, len(agPred)) for ag := range agPred { if ap, ok := agPos[ag]; ok { rowPointers[ap] = (*C.double)(unsafe.Pointer(&(agPred[ag].Storage()[0]))) trainPointers[ap] = (*C.double)(unsafe.Pointer(&(agTrain[ag].Storage()[0]))) rowSizes[ap] = agPred[ag].RowSizeInBytes() / 8 } } _, predRows := d.Size() _, trainRows := tr.Size() // Crete the distance vector distanceVec := distanceRecs(make([]_Ctype_struct_dist, trainRows)) // Additional datastructures voteVec := make([]int, KNN.NearestNeighbours) maxMap := make(map[string]int) for row := 0; row < predRows; row++ { for i := 0; i < trainRows; i++ { distanceVec[i].dist = 0 } for ag := range agPred { if ap, ok := agPos[ag]; ok { C.euclidean_distance( &(distanceVec[0]), C.int(trainRows), C.int(len(agPred[ag].Attributes())), C.int(row), trainPointers[ap], rowPointers[ap], ) } } sort.Sort(distanceVec) votes := distanceVec[:KNN.NearestNeighbours] for i, v := range votes { voteVec[i] = int(v.p) } maxClass := KNN.vote(maxMap, voteVec) base.SetClass(ret, row, maxClass) } return ret }