Exemplo n.º 1
0
// Predict is just a wrapper for the PredictOne function.
//
// IMPORTANT: Predict panics if Fit was not called or if the
// document vector and train matrix have a different number of columns.
func (nb *BernoulliNBClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {
	// Generate return vector
	ret := base.GeneratePredictionVector(what)

	// Get the features
	featAttrSpecs := base.ResolveAttributes(what, nb.attrs)

	what.MapOverRows(featAttrSpecs, func(row [][]byte, i int) (bool, error) {
		base.SetClass(ret, i, nb.PredictOne(row))
		return true, nil
	})

	return ret
}
Exemplo n.º 2
0
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {

	// Check what distance function we are using
	var distanceFunc pairwise.PairwiseDistanceFunc
	switch KNN.DistanceFunc {
	case "euclidean":
		distanceFunc = pairwise.NewEuclidean()
	case "manhattan":
		distanceFunc = pairwise.NewManhattan()
	default:
		panic("unsupported distance function")

	}
	// Check Compatibility
	allAttrs := base.CheckCompatible(what, KNN.TrainingData)
	if allAttrs == nil {
		// Don't have the same Attributes
		return nil
	}

	// Remove the Attributes which aren't numeric
	allNumericAttrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if fAttr, ok := a.(*base.FloatAttribute); ok {
			allNumericAttrs = append(allNumericAttrs, fAttr)
		}
	}

	// Generate return vector
	ret := base.GeneratePredictionVector(what)

	// Resolve Attribute specifications for both
	whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs)
	trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs)

	// Reserve storage for most the most similar items
	distances := make(map[int]float64)

	// Reserve storage for voting map
	maxmap := make(map[string]int)

	// Reserve storage for row computations
	trainRowBuf := make([]float64, len(allNumericAttrs))
	predRowBuf := make([]float64, len(allNumericAttrs))

	// Iterate over all outer rows
	what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) {
		// Read the float values out
		for i, _ := range allNumericAttrs {
			predRowBuf[i] = base.UnpackBytesToFloat(predRow[i])
		}

		predMat := utilities.FloatsToMatrix(predRowBuf)

		// Find the closest match in the training data
		KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) {

			// Read the float values out
			for i, _ := range allNumericAttrs {
				trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i])
			}

			// Compute the distance
			trainMat := utilities.FloatsToMatrix(trainRowBuf)
			distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat)
			return true, nil
		})

		sorted := utilities.SortIntMap(distances)
		values := sorted[:KNN.NearestNeighbours]

		// Reset maxMap
		for a := range maxmap {
			maxmap[a] = 0
		}

		// Refresh maxMap
		for _, elem := range values {
			label := base.GetClass(KNN.TrainingData, elem)
			if _, ok := maxmap[label]; ok {
				maxmap[label]++
			} else {
				maxmap[label] = 1
			}
		}

		// Sort the maxMap
		var maxClass string
		maxVal := -1
		for a := range maxmap {
			if maxmap[a] > maxVal {
				maxVal = maxmap[a]
				maxClass = a
			}
		}

		base.SetClass(ret, predRowNo, maxClass)
		return true, nil

	})

	return ret
}
Exemplo n.º 3
0
// Predict gathers predictions from all the classifiers
// and outputs the most common (majority) class
//
// IMPORTANT: in the event of a tie, the first class which
// achieved the tie value is output.
func (b *BaggedModel) Predict(from base.FixedDataGrid) base.FixedDataGrid {
	n := runtime.NumCPU()
	// Channel to receive the results as they come in
	votes := make(chan base.DataGrid, n)
	// Count the votes for each class
	voting := make(map[int](map[string]int))

	// Create a goroutine to collect the votes
	var votingwait sync.WaitGroup
	votingwait.Add(1)
	go func() {
		for { // Need to resolve the voting problem
			incoming, ok := <-votes
			if ok {
				cSpecs := base.ResolveAttributes(incoming, incoming.AllClassAttributes())
				incoming.MapOverRows(cSpecs, func(row [][]byte, predRow int) (bool, error) {
					// Check if we've seen this class before...
					if _, ok := voting[predRow]; !ok {
						// If we haven't, create an entry
						voting[predRow] = make(map[string]int)
						// Continue on the current row
					}
					voting[predRow][base.GetClass(incoming, predRow)]++
					return true, nil
				})
			} else {
				votingwait.Done()
				break
			}
		}
	}()

	// Create workers to process the predictions
	processpipe := make(chan int, n)
	var processwait sync.WaitGroup
	for i := 0; i < n; i++ {
		processwait.Add(1)
		go func() {
			for {
				if i, ok := <-processpipe; ok {
					c := b.Models[i]
					l := b.generatePredictionInstances(i, from)
					votes <- c.Predict(l)
				} else {
					processwait.Done()
					break
				}
			}
		}()
	}

	// Send all the models to the workers for prediction
	for i := range b.Models {
		processpipe <- i
	}
	close(processpipe) // Finished sending models to be predicted
	processwait.Wait() // Predictors all finished processing
	close(votes)       // Close the vote channel and allow it to drain
	votingwait.Wait()  // All the votes are in

	// Generate the overall consensus
	ret := base.GeneratePredictionVector(from)
	for i := range voting {
		maxClass := ""
		maxCount := 0
		// Find the most popular class
		for c := range voting[i] {
			votes := voting[i][c]
			if votes > maxCount {
				maxClass = c
				maxCount = votes
			}
		}
		base.SetClass(ret, i, maxClass)
	}
	return ret
}
Exemplo n.º 4
0
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {
	// Check what distance function we are using
	var distanceFunc pairwise.PairwiseDistanceFunc
	switch KNN.DistanceFunc {
	case "euclidean":
		distanceFunc = pairwise.NewEuclidean()
	case "manhattan":
		distanceFunc = pairwise.NewManhattan()
	default:
		panic("unsupported distance function")
	}
	// Check Compatibility
	allAttrs := base.CheckCompatible(what, KNN.TrainingData)
	if allAttrs == nil {
		// Don't have the same Attributes
		return nil
	}

	// Use optimised version if permitted
	if KNN.AllowOptimisations {
		if KNN.DistanceFunc == "euclidean" {
			if KNN.canUseOptimisations(what) {
				return KNN.optimisedEuclideanPredict(what.(*base.DenseInstances))
			}
		}
	}
	fmt.Println("Optimisations are switched off")

	// Remove the Attributes which aren't numeric
	allNumericAttrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if fAttr, ok := a.(*base.FloatAttribute); ok {
			allNumericAttrs = append(allNumericAttrs, fAttr)
		}
	}

	// Generate return vector
	ret := base.GeneratePredictionVector(what)

	// Resolve Attribute specifications for both
	whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs)
	trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs)

	// Reserve storage for most the most similar items
	distances := make(map[int]float64)

	// Reserve storage for voting map
	maxmap := make(map[string]int)

	// Reserve storage for row computations
	trainRowBuf := make([]float64, len(allNumericAttrs))
	predRowBuf := make([]float64, len(allNumericAttrs))

	_, maxRow := what.Size()
	curRow := 0

	// Iterate over all outer rows
	what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) {

		if (curRow%1) == 0 && curRow > 0 {
			fmt.Printf("KNN: %.2f %% done\n", float64(curRow)*100.0/float64(maxRow))
		}
		curRow++

		// Read the float values out
		for i, _ := range allNumericAttrs {
			predRowBuf[i] = base.UnpackBytesToFloat(predRow[i])
		}

		predMat := utilities.FloatsToMatrix(predRowBuf)

		// Find the closest match in the training data
		KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) {
			// Read the float values out
			for i, _ := range allNumericAttrs {
				trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i])
			}

			// Compute the distance
			trainMat := utilities.FloatsToMatrix(trainRowBuf)
			distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat)
			return true, nil
		})

		sorted := utilities.SortIntMap(distances)
		values := sorted[:KNN.NearestNeighbours]

		maxClass := KNN.vote(maxmap, values)

		base.SetClass(ret, predRowNo, maxClass)
		return true, nil

	})

	return ret
}
Exemplo n.º 5
0
func (KNN *KNNClassifier) optimisedEuclideanPredict(d *base.DenseInstances) base.FixedDataGrid {

	// Create return vector
	ret := base.GeneratePredictionVector(d)
	// Type-assert training data
	tr := KNN.TrainingData.(*base.DenseInstances)
	// Enumeration of AttributeGroups
	agPos := make(map[string]int)
	agTrain := tr.AllAttributeGroups()
	agPred := d.AllAttributeGroups()
	classAttrs := tr.AllClassAttributes()
	counter := 0
	for ag := range agTrain {
		// Detect whether the AttributeGroup has any classes in it
		attrs := agTrain[ag].Attributes()
		//matched := false
		if len(base.AttributeIntersect(classAttrs, attrs)) == 0 {
			agPos[ag] = counter
		}
		counter++
	}
	// Pointers to the start of each prediction row
	rowPointers := make([]*C.double, len(agPred))
	trainPointers := make([]*C.double, len(agPred))
	rowSizes := make([]int, len(agPred))
	for ag := range agPred {
		if ap, ok := agPos[ag]; ok {

			rowPointers[ap] = (*C.double)(unsafe.Pointer(&(agPred[ag].Storage()[0])))
			trainPointers[ap] = (*C.double)(unsafe.Pointer(&(agTrain[ag].Storage()[0])))
			rowSizes[ap] = agPred[ag].RowSizeInBytes() / 8
		}
	}
	_, predRows := d.Size()
	_, trainRows := tr.Size()
	// Crete the distance vector
	distanceVec := distanceRecs(make([]_Ctype_struct_dist, trainRows))
	// Additional datastructures
	voteVec := make([]int, KNN.NearestNeighbours)
	maxMap := make(map[string]int)

	for row := 0; row < predRows; row++ {
		for i := 0; i < trainRows; i++ {
			distanceVec[i].dist = 0
		}
		for ag := range agPred {
			if ap, ok := agPos[ag]; ok {
				C.euclidean_distance(
					&(distanceVec[0]),
					C.int(trainRows),
					C.int(len(agPred[ag].Attributes())),
					C.int(row),
					trainPointers[ap],
					rowPointers[ap],
				)
			}
		}
		sort.Sort(distanceVec)
		votes := distanceVec[:KNN.NearestNeighbours]
		for i, v := range votes {
			voteVec[i] = int(v.p)
		}
		maxClass := KNN.vote(maxMap, voteVec)
		base.SetClass(ret, row, maxClass)
	}
	return ret
}
Exemplo n.º 6
0
func main() {

	// Instances can be read using ParseCsvToInstances
	rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
	if err != nil {
		panic(err)
	}

	// Instances can be printed, and you'll see a human-readable summary
	// if you do so. The first section is a line like
	//     Instances with 150 row(s) and 5 attribute(s)
	//
	// It next prints all the attributes
	//     FloatAttribute(Sepal length)
	//     FloatAttribute(Sepal width)
	//     FloatAttribute(Petal length)
	//     FloatAttribute(Petal width)
	//     CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica])
	// The final attribute has an asterisk (*) printed before it,
	// meaning that it is the class variable. It then prints out up to
	// 30 rows which correspond to those attributes.
	// 	5.10 3.50 1.40 0.20 Iris-setosa
	// 	4.90 3.00 1.40 0.20 Iris-setosa
	fmt.Println(rawData)

	// If two decimal places isn't enough, you can update the
	// Precision field on any FloatAttribute
	if attr, ok := rawData.AllAttributes()[0].(*base.FloatAttribute); !ok {
		panic("Invalid cast")
	} else {
		attr.Precision = 4
	}
	// Now the first column has more precision
	fmt.Println(rawData)

	// We can update the set of Instances, although the API
	// for doing so is not very sophisticated.

	// First, have to resolve Attribute Specifications
	as := base.ResolveAttributes(rawData, rawData.AllAttributes())

	// Attribute Specifications describe where a given column lives
	rawData.Set(as[0], 0, as[0].GetAttribute().GetSysValFromString("1.00"))

	// A SetClass method exists as a shortcut
	base.SetClass(rawData, 0, "Iris-unusual")
	fmt.Println(rawData)

	// There is a way of creating new Instances from scratch.
	// Inside an Instance, everything's stored as float64
	newData := make([]float64, 2)
	newData[0] = 1.0
	newData[1] = 0.0

	// Let's create some attributes
	attrs := make([]base.Attribute, 2)
	attrs[0] = base.NewFloatAttribute("Arbitrary Float Quantity")
	attrs[1] = new(base.CategoricalAttribute)
	attrs[1].SetName("Class")
	// Insert a standard class
	attrs[1].GetSysValFromString("A")

	// Now let's create the final instances set
	newInst := base.NewDenseInstances()

	// Add the attributes
	newSpecs := make([]base.AttributeSpec, len(attrs))
	for i, a := range attrs {
		newSpecs[i] = newInst.AddAttribute(a)
	}

	// Allocate space
	newInst.Extend(1)

	// Write the data
	newInst.Set(newSpecs[0], 0, newSpecs[0].GetAttribute().GetSysValFromString("1.0"))
	newInst.Set(newSpecs[1], 0, newSpecs[1].GetAttribute().GetSysValFromString("A"))

	fmt.Println(newInst)

}