func TestRPHashSimpleOnNumImagesData(t *testing.T) { numClusters := 10 lines, err := utils.ReadLines("../demo/data/MNISTnumImages5000.txt") if err != nil { panic(err) } data := utils.StringArrayToFloatArray(lines) start := time.Now() RPHashObject := reader.NewSimpleArray(data, numClusters) simpleObject := simple.NewSimple(RPHashObject) simpleObject.Run() result := RPHashObject.GetCentroids() time := time.Since(start) totalSqDist := float64(0) for _, vector := range data { _, dist := utils.FindNearestDistance(vector, result) totalSqDist += dist * dist } t.Log("Total Square Distance: ", totalSqDist) t.Log("Average Square Distance: ", totalSqDist/float64(len(data))) t.Log("Runtime(seconds): ", time.Seconds()) if len(result) != numClusters { t.Errorf("RPHash Stream did not present the correct number of clusters.") } }
func TestSimpleLeastDistanceVsKmeans(t *testing.T) { //Create fake data var numClusters = 5 var numRows = 400 var dimensionality = 1000 data := make([][]float64, numRows, numRows) for i := 0; i < numRows; i++ { row := make([]float64, dimensionality, dimensionality) for j := 0; j < dimensionality; j++ { row[j] = rand.Float64() } data[i] = row } start := time.Now() //Test RPHash with Fake Object RPHashObject := reader.NewSimpleArray(data, numClusters) simpleObject := simple.NewSimple(RPHashObject) simpleObject.Run() if len(RPHashObject.GetCentroids()) != numClusters { t.Errorf("Requested %v centriods. But RPHashSimple returned %v.", numClusters, len(RPHashObject.GetCentroids())) } rpHashResult := RPHashObject.GetCentroids() fmt.Println("RPHash: ", time.Since(start)) //Find clusters using KMeans start = time.Now() clusterer := clusterer.NewKMeansSimple(numClusters, data) clusterer.Run() kMeansResult := clusterer.GetCentroids() fmt.Println("kMeans: ", time.Since(start)) var kMeansAssignment = 0 var rpHashAssignment = 0 var matchingAssignmentCount = 0 var kMeansTotalDist = float64(0) var rpHashTotalDist = float64(0) for _, vector := range data { rpHashAssignment, _ = utils.FindNearestDistance(vector, rpHashResult) kMeansAssignment, _ = utils.FindNearestDistance(vector, kMeansResult) kMeansTotalDist += utils.Distance(vector, kMeansResult[kMeansAssignment]) rpHashTotalDist += utils.Distance(vector, rpHashResult[rpHashAssignment]) //t.Log(rpHashAssignments[i], kMeansAssignments[i]); if rpHashAssignment == kMeansAssignment { matchingAssignmentCount += 1 } } t.Log("RPHash:", rpHashTotalDist) t.Log("KMeans:", kMeansTotalDist) t.Log("Ratio: ", kMeansTotalDist/rpHashTotalDist) }
func BenchmarkSimple(b *testing.B) { var numClusters = 5 var numRows = 4000 var dimensionality = 1000 data := make([][]float64, numRows, numRows) for i := 0; i < numRows; i++ { row := make([]float64, dimensionality, dimensionality) for j := 0; j < dimensionality; j++ { row[j] = rand.Float64() } data[i] = row } for i := 0; i < b.N; i++ { RPHashObject := reader.NewSimpleArray(data, numClusters) simpleObject := simple.NewSimple(RPHashObject) simpleObject.Run() RPHashObject.GetCentroids() } }