Esempio n. 1
func TestRPHashSimpleOnNumImagesData(t *testing.T) {
	numClusters := 10
	lines, err := utils.ReadLines("../demo/data/MNISTnumImages5000.txt")
	if err != nil {
	data := utils.StringArrayToFloatArray(lines)

	start := time.Now()
	RPHashObject := reader.NewSimpleArray(data, numClusters)
	simpleObject := simple.NewSimple(RPHashObject)

	result := RPHashObject.GetCentroids()
	time := time.Since(start)

	totalSqDist := float64(0)
	for _, vector := range data {
		_, dist := utils.FindNearestDistance(vector, result)
		totalSqDist += dist * dist

	t.Log("Total Square Distance: ", totalSqDist)
	t.Log("Average Square Distance: ", totalSqDist/float64(len(data)))
	t.Log("Runtime(seconds): ", time.Seconds())

	if len(result) != numClusters {
		t.Errorf("RPHash Stream did not present the correct number of clusters.")
Esempio n. 2
func TestSimpleLeastDistanceVsKmeans(t *testing.T) {

	//Create fake data
	var numClusters = 5
	var numRows = 400
	var dimensionality = 1000
	data := make([][]float64, numRows, numRows)
	for i := 0; i < numRows; i++ {
		row := make([]float64, dimensionality, dimensionality)
		for j := 0; j < dimensionality; j++ {
			row[j] = rand.Float64()
		data[i] = row

	start := time.Now()
	//Test RPHash with Fake Object
	RPHashObject := reader.NewSimpleArray(data, numClusters)
	simpleObject := simple.NewSimple(RPHashObject)

	if len(RPHashObject.GetCentroids()) != numClusters {
		t.Errorf("Requested %v centriods. But RPHashSimple returned %v.", numClusters, len(RPHashObject.GetCentroids()))
	rpHashResult := RPHashObject.GetCentroids()
	fmt.Println("RPHash: ", time.Since(start))
	//Find clusters using KMeans
	start = time.Now()
	clusterer := clusterer.NewKMeansSimple(numClusters, data)

	kMeansResult := clusterer.GetCentroids()
	fmt.Println("kMeans: ", time.Since(start))

	var kMeansAssignment = 0
	var rpHashAssignment = 0
	var matchingAssignmentCount = 0
	var kMeansTotalDist = float64(0)
	var rpHashTotalDist = float64(0)
	for _, vector := range data {
		rpHashAssignment, _ = utils.FindNearestDistance(vector, rpHashResult)
		kMeansAssignment, _ = utils.FindNearestDistance(vector, kMeansResult)
		kMeansTotalDist += utils.Distance(vector, kMeansResult[kMeansAssignment])
		rpHashTotalDist += utils.Distance(vector, rpHashResult[rpHashAssignment])
		//t.Log(rpHashAssignments[i], kMeansAssignments[i]);
		if rpHashAssignment == kMeansAssignment {
			matchingAssignmentCount += 1
	t.Log("RPHash:", rpHashTotalDist)
	t.Log("KMeans:", kMeansTotalDist)
	t.Log("Ratio: ", kMeansTotalDist/rpHashTotalDist)
Esempio n. 3
func BenchmarkSimple(b *testing.B) {
	var numClusters = 5
	var numRows = 4000
	var dimensionality = 1000
	data := make([][]float64, numRows, numRows)
	for i := 0; i < numRows; i++ {
		row := make([]float64, dimensionality, dimensionality)
		for j := 0; j < dimensionality; j++ {
			row[j] = rand.Float64()
		data[i] = row
	for i := 0; i < b.N; i++ {
		RPHashObject := reader.NewSimpleArray(data, numClusters)
		simpleObject := simple.NewSimple(RPHashObject)
Esempio n. 4
func TestSimpleArray(t *testing.T) {
	var k = 4
	var dimensionality = 100
	var numBlurs = 2
	var numProjections = 2
	var numDataPoints = 8
	var origVariance float64 = 1
	var testDecoderType types.Decoder
	var newNumProjections = 4
	var newHashModulus int64 = rand.Int63()

	newVarianceSample, newCentroidList := make([][]float64, numDataPoints), make([][]float64, numDataPoints)
	for i := 0; i < numDataPoints; i++ {
		newVarianceSample[i], newCentroidList[i] = make([]float64, dimensionality), make([]float64, dimensionality)
		for j := 0; j < dimensionality; j++ {
			newVarianceSample[i][j], newCentroidList[i][j] = float64(i), float64(i)

	newCentroid := make([]float64, dimensionality)
	for i := 0; i < dimensionality; i++ {
		newCentroid[i] = float64(i)

	newTopId := make([]int64, dimensionality)
	for i := 0; i < dimensionality; i++ {
		newTopId[i] = int64(i)

	RPHashObject := reader.NewSimpleArray(newCentroidList, k)

	// K.
	assert.Equal(t, k, RPHashObject.GetK(), "Expected K equal to Stream K.")

	// Dimensionality.
	assert.Equal(t, dimensionality, RPHashObject.GetDimensions(), "Expected dimensionality equal to Stream dimensionality.")

	// Iterator.
	assert.NotNil(t, RPHashObject.GetVectorIterator(), "Vector iterator should be initially not be nil.")

	// Blurs.
	assert.Equal(t, numBlurs, RPHashObject.GetNumberOfBlurs(), "Number of blurs should be initially 2.")

	// Variance.
	assert.Equal(t, origVariance, RPHashObject.GetVariance(), "Variance should be equal to the new variance value.")
	newVariance := utils.VarianceSample(newVarianceSample, 0.01)
	assert.Equal(t, newVariance, RPHashObject.GetVariance(), "Variance should be equal to the new variance value.")

	// Decoders.
	origDecoderType := RPHashObject.GetDecoderType()
	assert.NotNil(t, origDecoderType)
	assert.Equal(t, reflect.ValueOf(&testDecoderType).Elem().Type(), reflect.ValueOf(&origDecoderType).Elem().Type(), "Decoder should implement the Decoder interface.")
	assert.Equal(t, testDecoderType, RPHashObject.GetDecoderType(), "Decoder should be set to a new Decoder.")

	// Projections.
	assert.Equal(t, numProjections, RPHashObject.GetNumberOfProjections(), "Number of projections should be initially 2.")
	assert.Equal(t, newNumProjections, RPHashObject.GetNumberOfProjections(), "Number of projections should be equal to the new number of projections.")

	// Hash modulus.
	assert.Equal(t, int64(math.MaxInt64), RPHashObject.GetHashModulus(), "Hash modulus should be equal to the maximum 32 bit integer value.")
	assert.Equal(t, newHashModulus, RPHashObject.GetHashModulus(), "Hash modulus should be equal to the new hash modulus.")

	// Centroids.
	assert.Empty(t, RPHashObject.GetCentroids(), "Centroids should initially be empty.")
	assert.Equal(t, newCentroid, RPHashObject.GetCentroids()[0], "First centroid should be the new centroid.")
	assert.Equal(t, newCentroidList, RPHashObject.GetCentroids(), "Centroids should be equal to the new centroid list.")

	// Top IDs
	assert.Empty(t, RPHashObject.GetPreviousTopID(), "Previous top ID should initially be empty.")
	assert.Equal(t, newTopId, RPHashObject.GetPreviousTopID(), "Previous top ID should be equal to the new top centroid.")
Esempio n. 5
func NewSimpleArray(k int, data [][]float64) types.RPHashObject {
	return reader.NewSimpleArray(data, k)