func TestStreamingKMeansOnNumImagesData(t *testing.T) { numClusters := 10 lines, err := utils.ReadLines("../demo/data/MNISTnumImages5000.txt") if err != nil { panic(err) } dimensionality := len(lines[0]) data := utils.StringArrayToFloatArray(lines) start := time.Now() kmeansStream := clusterer.NewKMeansStream(numClusters, 10, dimensionality) for _, vector := range data { kmeansStream.AddDataPoint(vector) } result := kmeansStream.GetCentroids() time := time.Since(start) totalSqDist := float64(0) for _, vector := range data { _, dist := utils.FindNearestDistance(vector, result) totalSqDist += dist * dist } t.Log("Total Square Distance: ", totalSqDist) t.Log("Average Square Distance: ", totalSqDist/float64(len(data))) t.Log("Runtime(seconds): ", time.Seconds()) if len(result) != numClusters { t.Errorf("RPHash Stream did not present the correct number of clusters.") } }
func TestStreamingKMeansOnRandomData(t *testing.T) { filereader := utils.NewDataFileReader(filePath) start := time.Now() kmeansStream := clusterer.NewKMeansStream(numClusters, 10, dimensionality) elapsedtime := time.Since(start) for { vector := filereader.Next() if vector == nil { break } start := time.Now() kmeansStream.AddDataPoint(vector) elapsedtime = elapsedtime + time.Since(start) } start = time.Now() result := kmeansStream.GetCentroids() elapsedtime = elapsedtime + time.Since(start) totalSqDist := float64(0) filereader = utils.NewDataFileReader(filePath) for { vector := filereader.Next() if vector == nil { break } _, dist := utils.FindNearestDistance(vector, result) totalSqDist += dist * dist } t.Log("Total Square Distance: ", totalSqDist) t.Log("Average Square Distance: ", totalSqDist/numDataPoints) t.Log("Runtime(seconds): ", elapsedtime.Seconds()) if len(result) != numClusters { t.Errorf("RPHash Stream did not present the correct number of clusters.") } }
func NewKMeansStream(k int, n int, dimenstionality int) types.Clusterer { return clusterer.NewKMeansStream(k, n, dimenstionality) }