Example #1
0
func TestStreamingKMeansOnNumImagesData(t *testing.T) {
	numClusters := 10
	lines, err := utils.ReadLines("../demo/data/MNISTnumImages5000.txt")
	if err != nil {
		panic(err)
	}
	dimensionality := len(lines[0])
	data := utils.StringArrayToFloatArray(lines)

	start := time.Now()
	kmeansStream := clusterer.NewKMeansStream(numClusters, 10, dimensionality)
	for _, vector := range data {
		kmeansStream.AddDataPoint(vector)
	}

	result := kmeansStream.GetCentroids()
	time := time.Since(start)
	totalSqDist := float64(0)
	for _, vector := range data {
		_, dist := utils.FindNearestDistance(vector, result)
		totalSqDist += dist * dist
	}

	t.Log("Total Square Distance: ", totalSqDist)
	t.Log("Average Square Distance: ", totalSqDist/float64(len(data)))
	t.Log("Runtime(seconds): ", time.Seconds())

	if len(result) != numClusters {
		t.Errorf("RPHash Stream did not present the correct number of clusters.")
	}
}
Example #2
0
func TestStreamingKMeansOnRandomData(t *testing.T) {
	filereader := utils.NewDataFileReader(filePath)

	start := time.Now()
	kmeansStream := clusterer.NewKMeansStream(numClusters, 10, dimensionality)
	elapsedtime := time.Since(start)
	for {
		vector := filereader.Next()
		if vector == nil {
			break
		}
		start := time.Now()
		kmeansStream.AddDataPoint(vector)
		elapsedtime = elapsedtime + time.Since(start)
	}
	start = time.Now()
	result := kmeansStream.GetCentroids()
	elapsedtime = elapsedtime + time.Since(start)
	totalSqDist := float64(0)
	filereader = utils.NewDataFileReader(filePath)
	for {
		vector := filereader.Next()
		if vector == nil {
			break
		}
		_, dist := utils.FindNearestDistance(vector, result)
		totalSqDist += dist * dist
	}

	t.Log("Total Square Distance: ", totalSqDist)
	t.Log("Average Square Distance: ", totalSqDist/numDataPoints)
	t.Log("Runtime(seconds): ", elapsedtime.Seconds())

	if len(result) != numClusters {
		t.Errorf("RPHash Stream did not present the correct number of clusters.")
	}
}
Example #3
0
func NewKMeansStream(k int, n int, dimenstionality int) types.Clusterer {
	return clusterer.NewKMeansStream(k, n, dimenstionality)
}