Exemple #1
0
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
func (KNN *KNNClassifier) PredictOne(vector []float64) string {

	rows := KNN.TrainingData.Rows
	rownumbers := make(map[int]float64)
	labels := make([]string, 0)
	maxmap := make(map[string]int)

	convertedVector := util.FloatsToMatrix(vector)

	// Check what distance function we are using
	switch KNN.DistanceFunc {
	case "euclidean":
		{
			euclidean := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.TrainingData.GetRowVectorWithoutClass(i)
				rowMat := util.FloatsToMatrix(row)
				distance := euclidean.Distance(rowMat, convertedVector)
				rownumbers[i] = distance
			}
		}
	case "manhattan":
		{
			manhattan := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.TrainingData.GetRowVectorWithoutClass(i)
				rowMat := util.FloatsToMatrix(row)
				distance := manhattan.Distance(rowMat, convertedVector)
				rownumbers[i] = distance
			}
		}
	}

	sorted := util.SortIntMap(rownumbers)
	values := sorted[:KNN.NearestNeighbours]

	for _, elem := range values {
		label := KNN.TrainingData.GetClass(elem)
		labels = append(labels, label)

		if _, ok := maxmap[label]; ok {
			maxmap[label] += 1
		} else {
			maxmap[label] = 1
		}
	}

	sortedlabels := util.SortStringMap(maxmap)
	label := sortedlabels[0]

	return label
}
Exemple #2
0
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
func (KNN *KNNClassifier) Predict(vector []float64, K int) string {

	convertedVector := util.FloatsToMatrix(vector)
	// Get the number of rows
	rows, _ := KNN.Data.Dims()
	rownumbers := make(map[int]float64)
	labels := make([]string, 0)
	maxmap := make(map[string]int)

	// Check what distance function we are using
	switch KNN.DistanceFunc {
	case "euclidean":
		{
			euclidean := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.Data.RowView(i)
				rowMat := util.FloatsToMatrix(row)
				distance := euclidean.Distance(rowMat, convertedVector)
				rownumbers[i] = distance
			}
		}
	case "manhattan":
		{
			manhattan := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.Data.RowView(i)
				rowMat := util.FloatsToMatrix(row)
				distance := manhattan.Distance(rowMat, convertedVector)
				rownumbers[i] = distance
			}
		}
	}

	sorted := util.SortIntMap(rownumbers)
	values := sorted[:K]

	for _, elem := range values {
		// It's when we access this map
		labels = append(labels, KNN.Labels[elem])

		if _, ok := maxmap[KNN.Labels[elem]]; ok {
			maxmap[KNN.Labels[elem]] += 1
		} else {
			maxmap[KNN.Labels[elem]] = 1
		}
	}

	sortedlabels := util.SortStringMap(maxmap)
	label := sortedlabels[0]

	return label
}
Exemple #3
0
func TestDBSCANSynthetic(t *testing.T) {
	Convey("Synthetic DBSCAN test should work...", t, func() {

		inst, err := base.ParseCSVToInstances("synthetic.csv", false)
		So(err, ShouldBeNil)

		p := DBSCANParameters{
			ClusterParameters{
				inst.AllAttributes(),
				pairwise.NewEuclidean(),
			},
			1,
			1,
		}

		m, err := DBSCAN(inst, p)
		So(err, ShouldBeNil)

		So(len(m), ShouldEqual, 2)
		So(m[1], ShouldContain, 0)
		So(m[1], ShouldContain, 1)
		So(m[1], ShouldContain, 2)
		So(m[1], ShouldContain, 3)

	})
}
Exemple #4
0
func TestDBSCANDistanceQuery(t *testing.T) {

	Convey("Should be able to determine which points are in range...", t, func() {

		// Read in the synthetic test data
		inst, err := base.ParseCSVToInstances("synthetic.csv", false)
		So(err, ShouldBeNil)

		// Create a neighbours vector
		neighbours := big.NewInt(0)

		// Compute pairwise distances
		dist, err := computePairwiseDistances(inst, inst.AllAttributes(), pairwise.NewEuclidean())
		So(dist.At(0, 0), ShouldAlmostEqual, 0)
		So(dist.At(0, 1), ShouldAlmostEqual, 1)
		So(dist.At(1, 0), ShouldAlmostEqual, 1)
		So(dist.At(0, 2), ShouldAlmostEqual, math.Sqrt(5))
		So(dist.At(2, 0), ShouldAlmostEqual, math.Sqrt(5))
		So(err, ShouldBeNil)

		// Do the region query
		neighbours = regionQuery(0, neighbours, dist, 1)
		So(neighbours.Bit(0), ShouldEqual, 1)
		So(neighbours.Bit(1), ShouldEqual, 1)
		So(neighbours.Bit(2), ShouldEqual, 0)
		So(neighbours.Bit(3), ShouldEqual, 0)
		So(neighbours.Bit(4), ShouldEqual, 0)

	})

}
Exemple #5
0
func TestDBSCAN(t *testing.T) {

	Convey("Loading some data and labels...", t, func() {

		inst, err := base.ParseCSVToInstances("dbscan.csv", false)
		So(err, ShouldBeNil)

		file, err := os.Open("dbscan_labels.csv")
		defer file.Close()
		So(err, ShouldBeNil)

		clusterMap := ClusterMap(make(map[int][]int))

		scanner := bufio.NewScanner(file)
		line := -1
		for scanner.Scan() {
			line = line + 1
			v, err := strconv.ParseInt(scanner.Text(), 10, 64)
			if err != nil {
				panic(err)
			}
			v = v + 1 // -1 are noise in scikit-learn's DBSCAN
			c := int(v)
			if c == 0 {
				continue
			}
			if _, ok := clusterMap[c]; !ok {
				clusterMap[c] = make([]int, 0)
			}
			clusterMap[c] = append(clusterMap[c], line)
		}

		Convey("Our DBSCAN implementation should match...", func() {
			p := DBSCANParameters{
				ClusterParameters{
					inst.AllAttributes(),
					pairwise.NewEuclidean(),
				},
				0.3,
				10,
			}
			m, err := DBSCAN(inst, p)
			Convey("There should be nothing in the result that's smaller than MinPts", func() {

				for id := range m {
					So(len(m[id]), ShouldBeGreaterThanOrEqualTo, 10)
				}

			})
			So(err, ShouldBeNil)
			eq, err := clusterMap.Equals(m)
			So(err, ShouldBeNil)
			So(eq, ShouldBeTrue)
		})
	})

}
Exemple #6
0
//Returns an average of the K nearest labels/variables, based on a vector input.
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {

	// Get the number of rows
	rows, _ := KNN.Data.Dims()
	rownumbers := make(map[int]float64)
	labels := make([]float64, 0)

	// Check what distance function we are using
	switch KNN.DistanceFunc {
	case "euclidean":
		{
			euclidean := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.Data.RowView(i)
				rowMat := util.FloatsToMatrix(row)
				distance := euclidean.Distance(rowMat, vector)
				rownumbers[i] = distance
			}
		}
	case "manhattan":
		{
			manhattan := pairwiseMetrics.NewEuclidean()
			for i := 0; i < rows; i++ {
				row := KNN.Data.RowView(i)
				rowMat := util.FloatsToMatrix(row)
				distance := manhattan.Distance(rowMat, vector)
				rownumbers[i] = distance
			}
		}
	}

	sorted := util.SortIntMap(rownumbers)
	values := sorted[:K]

	var sum float64
	for _, elem := range values {
		value := KNN.Values[elem]
		labels = append(labels, value)
		sum += value
	}

	average := sum / float64(K)
	return average
}
Exemple #7
0
func TestDBSCANDistanceMetric(t *testing.T) {

	Convey("Check the distance function is sane...", t, func() {

		d1 := mat64.NewDense(1, 2, nil)
		d2 := mat64.NewDense(1, 2, nil)

		d1.Set(0, 0, 0.494260967249)
		d1.Set(0, 1, 1.45106696541)
		d2.Set(0, 0, -1.42808099324)
		d2.Set(0, 1, -0.83706376669)

		e := pairwise.NewEuclidean()
		So(e.Distance(d1, d2), ShouldAlmostEqual, 2.9882, 0.001)

	})

}
Exemple #8
0
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
	// Get the number of rows
	rows, _ := KNN.Data.Dims()
	rownumbers := make(map[int]float64)
	labels := make([]float64, 0)

	// Check what distance function we are using
	var distanceFunc pairwise.PairwiseDistanceFunc
	switch KNN.DistanceFunc {
	case "euclidean":
		distanceFunc = pairwise.NewEuclidean()
	case "manhattan":
		distanceFunc = pairwise.NewManhattan()
	default:
		panic("unsupported distance function")
	}

	for i := 0; i < rows; i++ {
		row := KNN.Data.RowView(i)
		distance := distanceFunc.Distance(utilities.VectorToMatrix(row), vector)
		rownumbers[i] = distance
	}

	sorted := utilities.SortIntMap(rownumbers)
	values := sorted[:K]

	var sum float64
	for _, elem := range values {
		value := KNN.Values[elem]
		labels = append(labels, value)
		sum += value
	}

	average := sum / float64(K)
	return average
}
Exemple #9
0
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {

	// Check what distance function we are using
	var distanceFunc pairwise.PairwiseDistanceFunc
	switch KNN.DistanceFunc {
	case "euclidean":
		distanceFunc = pairwise.NewEuclidean()
	case "manhattan":
		distanceFunc = pairwise.NewManhattan()
	default:
		panic("unsupported distance function")

	}
	// Check Compatibility
	allAttrs := base.CheckCompatible(what, KNN.TrainingData)
	if allAttrs == nil {
		// Don't have the same Attributes
		return nil
	}

	// Remove the Attributes which aren't numeric
	allNumericAttrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if fAttr, ok := a.(*base.FloatAttribute); ok {
			allNumericAttrs = append(allNumericAttrs, fAttr)
		}
	}

	// Generate return vector
	ret := base.GeneratePredictionVector(what)

	// Resolve Attribute specifications for both
	whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs)
	trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs)

	// Reserve storage for most the most similar items
	distances := make(map[int]float64)

	// Reserve storage for voting map
	maxmap := make(map[string]int)

	// Reserve storage for row computations
	trainRowBuf := make([]float64, len(allNumericAttrs))
	predRowBuf := make([]float64, len(allNumericAttrs))

	// Iterate over all outer rows
	what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) {
		// Read the float values out
		for i, _ := range allNumericAttrs {
			predRowBuf[i] = base.UnpackBytesToFloat(predRow[i])
		}

		predMat := utilities.FloatsToMatrix(predRowBuf)

		// Find the closest match in the training data
		KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) {

			// Read the float values out
			for i, _ := range allNumericAttrs {
				trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i])
			}

			// Compute the distance
			trainMat := utilities.FloatsToMatrix(trainRowBuf)
			distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat)
			return true, nil
		})

		sorted := utilities.SortIntMap(distances)
		values := sorted[:KNN.NearestNeighbours]

		// Reset maxMap
		for a := range maxmap {
			maxmap[a] = 0
		}

		// Refresh maxMap
		for _, elem := range values {
			label := base.GetClass(KNN.TrainingData, elem)
			if _, ok := maxmap[label]; ok {
				maxmap[label]++
			} else {
				maxmap[label] = 1
			}
		}

		// Sort the maxMap
		var maxClass string
		maxVal := -1
		for a := range maxmap {
			if maxmap[a] > maxVal {
				maxVal = maxmap[a]
				maxClass = a
			}
		}

		base.SetClass(ret, predRowNo, maxClass)
		return true, nil

	})

	return ret
}
Exemple #10
0
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {
	// Check what distance function we are using
	var distanceFunc pairwise.PairwiseDistanceFunc
	switch KNN.DistanceFunc {
	case "euclidean":
		distanceFunc = pairwise.NewEuclidean()
	case "manhattan":
		distanceFunc = pairwise.NewManhattan()
	default:
		panic("unsupported distance function")
	}
	// Check Compatibility
	allAttrs := base.CheckCompatible(what, KNN.TrainingData)
	if allAttrs == nil {
		// Don't have the same Attributes
		return nil
	}

	// Use optimised version if permitted
	if KNN.AllowOptimisations {
		if KNN.DistanceFunc == "euclidean" {
			if KNN.canUseOptimisations(what) {
				return KNN.optimisedEuclideanPredict(what.(*base.DenseInstances))
			}
		}
	}
	fmt.Println("Optimisations are switched off")

	// Remove the Attributes which aren't numeric
	allNumericAttrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if fAttr, ok := a.(*base.FloatAttribute); ok {
			allNumericAttrs = append(allNumericAttrs, fAttr)
		}
	}

	// Generate return vector
	ret := base.GeneratePredictionVector(what)

	// Resolve Attribute specifications for both
	whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs)
	trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs)

	// Reserve storage for most the most similar items
	distances := make(map[int]float64)

	// Reserve storage for voting map
	maxmap := make(map[string]int)

	// Reserve storage for row computations
	trainRowBuf := make([]float64, len(allNumericAttrs))
	predRowBuf := make([]float64, len(allNumericAttrs))

	_, maxRow := what.Size()
	curRow := 0

	// Iterate over all outer rows
	what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) {

		if (curRow%1) == 0 && curRow > 0 {
			fmt.Printf("KNN: %.2f %% done\n", float64(curRow)*100.0/float64(maxRow))
		}
		curRow++

		// Read the float values out
		for i, _ := range allNumericAttrs {
			predRowBuf[i] = base.UnpackBytesToFloat(predRow[i])
		}

		predMat := utilities.FloatsToMatrix(predRowBuf)

		// Find the closest match in the training data
		KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) {
			// Read the float values out
			for i, _ := range allNumericAttrs {
				trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i])
			}

			// Compute the distance
			trainMat := utilities.FloatsToMatrix(trainRowBuf)
			distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat)
			return true, nil
		})

		sorted := utilities.SortIntMap(distances)
		values := sorted[:KNN.NearestNeighbours]

		maxClass := KNN.vote(maxmap, values)

		base.SetClass(ret, predRowNo, maxClass)
		return true, nil

	})

	return ret
}