Example #1
0
func NewSimpleArray(inData [][]float64, k int) *SimpleArray {
	randomSeed := rand.Int63()
	data := utils.NewIterator(inData)
	numDataPoints := len(inData)
	dimension := 2
	// As the number of rotations increases, the distance increases.
	// Increases the noise.
	numberOfRotations := 6
	numberOfSearches := 1
	numberOfProjections := 2
	numberOfBlurs := 2
	if data != nil {
		// Get the first vector in the data set's length.
		dimension = len(inData[0])
	}
	hashModulus := int64(math.MaxInt64)
	// Set the target dimension much lower.
	targetDimension := int(math.Floor(float64(dimension / 2)))
	decoder := decoder.NewSpherical(targetDimension, numberOfRotations, numberOfSearches)
	centroids := [][]float64{}
	topIDs := []int64{}
	return &SimpleArray{
		numDataPoints:       numDataPoints,
		data:                data,
		dimension:           dimension,
		numberOfProjections: numberOfProjections,
		randomSeed:          randomSeed,
		hashModulus:         hashModulus,
		k:                   k,
		numberOfBlurs:       numberOfBlurs,
		decoder:             decoder,
		centroids:           centroids,
		topIDs:              topIDs,
	}
}
Example #2
0
func TestSpherical(t *testing.T) {
	var dimension, k, l, iterations int = 64, 6, 4, 10000
	sphere := decoder.NewSpherical(dimension, k, l)
	var collisions int = 0
	var distavg float64 = 0.0
	for j := 0; j < iterations; j++ {
		p1, p2 := make([]float64, dimension), make([]float64, dimension)
		for k := 0; k < dimension; k++ {
			p1[k] = rand.Float64()*2 - 1
			p2[k] = rand.Float64()*2 - 1
		}
		/* Get the distance of each vector from eachother. */
		distavg += utils.Distance(p1, p2)
		mh := hash.NewMurmur(1<<63 - 1)
		/* Decode from 24-dimensions -> 1-dimensional integer */
		hp1, hp2 := sphere.Hash(utils.Normalize(p1)), sphere.Hash(utils.Normalize(p2))
		/* Blurring the integers into a smaller space. */
		hash1, hash2 := mh.Hash(hp1), mh.Hash(hp2)
		if hash1 == hash2 {
			collisions++
		}
	}
	if collisions > (iterations / 100) {
		t.Errorf("More than 1 percent of the iterations resulted in collisions. %v collisions in %v iterations.",
			collisions, iterations)
	}
	t.Log("Average Distance: ", distavg/float64(iterations))
	t.Log("Percent collisions : ", float64(collisions)/float64(iterations))
	t.Log("√ Spherical Decoder test complete")
}
Example #3
0
func TestLSHStream(t *testing.T) {
	var seed int64 = 0
	var d, k, l int = 64, 6, 4
	data := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	var inDimensions, outDimentions int = 10, 2
	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(d, k, l)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	lsh := lsh.NewLSH(hash, decoder, projector)
	lsh.LSHHashStream(data, 1)
	t.Log("√ LSH Stream test complete")
}
Example #4
0
func BenchmarkStreamLSH(b *testing.B) {
	var seed int64 = 0
	var d, k, l int = 64, 6, 4
	data := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	var inDimensions, outDimentions int = 10, 2
	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(d, k, l)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	for i := 0; i < b.N; i++ {
		lsh := lsh.NewLSH(hash, decoder, projector)
		b.StopTimer()
		lsh.LSHHashStream(data, 1)
		b.StartTimer()
	}
}
Example #5
0
func BenchmarkSpherical(b *testing.B) {
	b.StopTimer()
	randomSeed := rand.New(rand.NewSource(time.Now().UnixNano()))
	var d, k, l int = 64, 6, 4
	sphere := decoder.NewSpherical(d, k, l)
	p1, p2 := make([]float64, d), make([]float64, d)
	for i := 0; i < b.N; i++ {
		for j := 0; j < d; j++ {
			p1[j], p2[j] = randomSeed.NormFloat64(), randomSeed.NormFloat64()
		}
		b.StartTimer()
		hp1, hp2 := sphere.Hash(utils.Normalize(p1)), sphere.Hash(utils.Normalize(p2))
		b.StopTimer()
		if hp1 == nil || hp2 == nil {
			b.Error("Spherical hashes are null")
		}
	}
}
Example #6
0
func NewStreamObject(dimension, k int) *StreamObject {
	var centroids [][]float64
	var topIDs []int64
	numberOfRotations := 2
	numberOfSearches := 2
	targetDimension := 24
	decoder := decoder.NewSpherical(targetDimension, numberOfRotations, numberOfSearches)
	data := utils.NewIterator([][]float64{})
	return &StreamObject{
		decoder:             decoder,
		data:                data,
		dimension:           dimension,
		randomSeed:          int64(0),
		hashModulus:         int64(int(0 >> 1)),
		numberOfProjections: 1,
		numberOfBlurs:       1,
		k:                   k,
		topIDs:              topIDs,
		centroids:           centroids,
		numDataPoints:       0,
	}
}
Example #7
0
// The datapoints are seeded in so that the first two data points are near eachother in euclidian geometery and the 3rd and 4th datapoint are
// near eachother in euclidian geometery. So the result1Cluster1 and result2Cluster1 should be closer together than the other two points.
// The same is true for the points in cluster two vs either point in cluster one.
func TestLSHSimple(t *testing.T) {
	var seed int64 = 0
	// We want to limit the dimension reduction because it causes a lot of noise.
	var inDimensions, outDimentions, numberOfClusters, numberOfSearches int = 10, 5, 3, 1
	dataPoint1Cluster1 := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	dataPoint2Cluster1 := []float64{2.0, 3.0, 2.0, 6.0, 5.5, 2.0, 8.0, 3.1, 2.0, 0.0}

	dataPoint1Cluster2 := []float64{100.0, -120.0, 6.0, 18.0, 209.0, 0.0, -2.0, 1036.0, 15.0, 123.0}
	dataPoint2Cluster2 := []float64{99.0, -119.0, 2.0, 18.0, 208.5, 0.0, -3.0, 1048.0, 13.0, 122.0}

	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(inDimensions, numberOfClusters, numberOfSearches)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	lsh := lsh.NewLSH(hash, decoder, projector)
	result1Cluster1 := lsh.LSHHashSimple(dataPoint1Cluster1)
	result2Cluster1 := lsh.LSHHashSimple(dataPoint2Cluster1)
	result1Cluster2 := lsh.LSHHashSimple(dataPoint1Cluster2)
	result2Cluster2 := lsh.LSHHashSimple(dataPoint2Cluster2)
	// Assert that results are still localy sensetive based on the original euclidian geometry
	if math.Abs(float64(result1Cluster1-result2Cluster1)) > math.Abs(float64(result1Cluster1-result1Cluster2)) {
		t.Errorf("\nThe first datapoint in cluster two is closer to the first data point in cluster one than the second data point in cluster one"+
			"\ndatapoint cluster one datapoint one: %d, \ndatapoint cluster one datapoint two: %d, \ndatapoint cluster two datapoint one: %d",
			result1Cluster1, result2Cluster1, result1Cluster2)
	}
	if math.Abs(float64(result1Cluster1-result2Cluster1)) > math.Abs(float64(result1Cluster1-result2Cluster2)) {
		t.Errorf("\nThe second datapoint in cluster two is closer to the first data point in cluster one than the second data point in cluster one"+
			"\nCluster one datapoint one: %d, \nCluster one datapoint two: %d, \nCluster two datapoint two: %d",
			result1Cluster1, result2Cluster1, result2Cluster2)
	}
	if math.Abs(float64(result1Cluster2-result2Cluster2)) > math.Abs(float64(result1Cluster1-result1Cluster2)) {
		t.Errorf("\nThe first datapoint in cluster one is closer to the first data point in cluster two than the second data point in cluster two"+
			"\nCluster one datapoint one: %d, \nCluster two datapoint one: %d, \nCluster two datapoint two: %d",
			result1Cluster1, result1Cluster2, result2Cluster2)
	}

	t.Log("√ LSH Simple test complete")
}
Example #8
0
func NewDecoder(dimension, rotations, numberOfSearches int) types.Decoder {
	return decoder.NewSpherical(dimension, rotations, numberOfSearches)
}