Beispiel #1
0
func TestRPHashSimpleOnNumImagesData(t *testing.T) {
	numClusters := 10
	lines, err := utils.ReadLines("../demo/data/MNISTnumImages5000.txt")
	if err != nil {
		panic(err)
	}
	data := utils.StringArrayToFloatArray(lines)

	start := time.Now()
	RPHashObject := reader.NewSimpleArray(data, numClusters)
	simpleObject := simple.NewSimple(RPHashObject)
	simpleObject.Run()

	result := RPHashObject.GetCentroids()
	time := time.Since(start)

	totalSqDist := float64(0)
	for _, vector := range data {
		_, dist := utils.FindNearestDistance(vector, result)
		totalSqDist += dist * dist
	}

	t.Log("Total Square Distance: ", totalSqDist)
	t.Log("Average Square Distance: ", totalSqDist/float64(len(data)))
	t.Log("Runtime(seconds): ", time.Seconds())

	if len(result) != numClusters {
		t.Errorf("RPHash Stream did not present the correct number of clusters.")
	}
}
Beispiel #2
0
func TestSimpleLeastDistanceVsKmeans(t *testing.T) {

	//Create fake data
	var numClusters = 5
	var numRows = 400
	var dimensionality = 1000
	data := make([][]float64, numRows, numRows)
	for i := 0; i < numRows; i++ {
		row := make([]float64, dimensionality, dimensionality)
		for j := 0; j < dimensionality; j++ {
			row[j] = rand.Float64()
		}
		data[i] = row
	}

	start := time.Now()
	//Test RPHash with Fake Object
	RPHashObject := reader.NewSimpleArray(data, numClusters)
	simpleObject := simple.NewSimple(RPHashObject)
	simpleObject.Run()

	if len(RPHashObject.GetCentroids()) != numClusters {
		t.Errorf("Requested %v centriods. But RPHashSimple returned %v.", numClusters, len(RPHashObject.GetCentroids()))
	}
	rpHashResult := RPHashObject.GetCentroids()
	fmt.Println("RPHash: ", time.Since(start))
	//Find clusters using KMeans
	start = time.Now()
	clusterer := clusterer.NewKMeansSimple(numClusters, data)
	clusterer.Run()

	kMeansResult := clusterer.GetCentroids()
	fmt.Println("kMeans: ", time.Since(start))

	var kMeansAssignment = 0
	var rpHashAssignment = 0
	var matchingAssignmentCount = 0
	var kMeansTotalDist = float64(0)
	var rpHashTotalDist = float64(0)
	for _, vector := range data {
		rpHashAssignment, _ = utils.FindNearestDistance(vector, rpHashResult)
		kMeansAssignment, _ = utils.FindNearestDistance(vector, kMeansResult)
		kMeansTotalDist += utils.Distance(vector, kMeansResult[kMeansAssignment])
		rpHashTotalDist += utils.Distance(vector, rpHashResult[rpHashAssignment])
		//t.Log(rpHashAssignments[i], kMeansAssignments[i]);
		if rpHashAssignment == kMeansAssignment {
			matchingAssignmentCount += 1
		}
	}
	t.Log("RPHash:", rpHashTotalDist)
	t.Log("KMeans:", kMeansTotalDist)
	t.Log("Ratio: ", kMeansTotalDist/rpHashTotalDist)
}
Beispiel #3
0
func BenchmarkSimple(b *testing.B) {
	var numClusters = 5
	var numRows = 4000
	var dimensionality = 1000
	data := make([][]float64, numRows, numRows)
	for i := 0; i < numRows; i++ {
		row := make([]float64, dimensionality, dimensionality)
		for j := 0; j < dimensionality; j++ {
			row[j] = rand.Float64()
		}
		data[i] = row
	}
	for i := 0; i < b.N; i++ {
		RPHashObject := reader.NewSimpleArray(data, numClusters)
		simpleObject := simple.NewSimple(RPHashObject)
		simpleObject.Run()
		RPHashObject.GetCentroids()
	}
}
func TestSimpleArray(t *testing.T) {
	var k = 4
	var dimensionality = 100
	var numBlurs = 2
	var numProjections = 2
	var numDataPoints = 8
	var origVariance float64 = 1
	var testDecoderType types.Decoder
	var newNumProjections = 4
	var newHashModulus int64 = rand.Int63()

	newVarianceSample, newCentroidList := make([][]float64, numDataPoints), make([][]float64, numDataPoints)
	for i := 0; i < numDataPoints; i++ {
		newVarianceSample[i], newCentroidList[i] = make([]float64, dimensionality), make([]float64, dimensionality)
		for j := 0; j < dimensionality; j++ {
			newVarianceSample[i][j], newCentroidList[i][j] = float64(i), float64(i)
		}
	}

	newCentroid := make([]float64, dimensionality)
	for i := 0; i < dimensionality; i++ {
		newCentroid[i] = float64(i)
	}

	newTopId := make([]int64, dimensionality)
	for i := 0; i < dimensionality; i++ {
		newTopId[i] = int64(i)
	}

	RPHashObject := reader.NewSimpleArray(newCentroidList, k)

	// K.
	assert.Equal(t, k, RPHashObject.GetK(), "Expected K equal to Stream K.")

	// Dimensionality.
	assert.Equal(t, dimensionality, RPHashObject.GetDimensions(), "Expected dimensionality equal to Stream dimensionality.")

	// Iterator.
	assert.NotNil(t, RPHashObject.GetVectorIterator(), "Vector iterator should be initially not be nil.")

	// Blurs.
	assert.Equal(t, numBlurs, RPHashObject.GetNumberOfBlurs(), "Number of blurs should be initially 2.")

	// Variance.
	assert.Equal(t, origVariance, RPHashObject.GetVariance(), "Variance should be equal to the new variance value.")
	RPHashObject.SetVariance(newVarianceSample)
	newVariance := utils.VarianceSample(newVarianceSample, 0.01)
	assert.Equal(t, newVariance, RPHashObject.GetVariance(), "Variance should be equal to the new variance value.")

	// Decoders.
	origDecoderType := RPHashObject.GetDecoderType()
	assert.NotNil(t, origDecoderType)
	assert.Equal(t, reflect.ValueOf(&testDecoderType).Elem().Type(), reflect.ValueOf(&origDecoderType).Elem().Type(), "Decoder should implement the Decoder interface.")
	RPHashObject.SetDecoderType(testDecoderType)
	assert.Equal(t, testDecoderType, RPHashObject.GetDecoderType(), "Decoder should be set to a new Decoder.")

	// Projections.
	assert.Equal(t, numProjections, RPHashObject.GetNumberOfProjections(), "Number of projections should be initially 2.")
	RPHashObject.SetNumberOfProjections(newNumProjections)
	assert.Equal(t, newNumProjections, RPHashObject.GetNumberOfProjections(), "Number of projections should be equal to the new number of projections.")

	// Hash modulus.
	assert.Equal(t, int64(math.MaxInt64), RPHashObject.GetHashModulus(), "Hash modulus should be equal to the maximum 32 bit integer value.")
	RPHashObject.SetHashModulus(newHashModulus)
	assert.Equal(t, newHashModulus, RPHashObject.GetHashModulus(), "Hash modulus should be equal to the new hash modulus.")

	// Centroids.
	assert.Empty(t, RPHashObject.GetCentroids(), "Centroids should initially be empty.")
	RPHashObject.AddCentroid(newCentroid)
	assert.Equal(t, newCentroid, RPHashObject.GetCentroids()[0], "First centroid should be the new centroid.")
	RPHashObject.SetCentroids(newCentroidList)
	assert.Equal(t, newCentroidList, RPHashObject.GetCentroids(), "Centroids should be equal to the new centroid list.")

	// Top IDs
	assert.Empty(t, RPHashObject.GetPreviousTopID(), "Previous top ID should initially be empty.")
	RPHashObject.SetPreviousTopID(newTopId)
	assert.Equal(t, newTopId, RPHashObject.GetPreviousTopID(), "Previous top ID should be equal to the new top centroid.")
}
Beispiel #5
0
func NewSimpleArray(k int, data [][]float64) types.RPHashObject {
	return reader.NewSimpleArray(data, k)
}