Ejemplo n.º 1
0
func TestLSHStream(t *testing.T) {
	var seed int64 = 0
	var d, k, l int = 64, 6, 4
	data := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	var inDimensions, outDimentions int = 10, 2
	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(d, k, l)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	lsh := lsh.NewLSH(hash, decoder, projector)
	lsh.LSHHashStream(data, 1)
	t.Log("√ LSH Stream test complete")
}
Ejemplo n.º 2
0
func BenchmarkStreamLSH(b *testing.B) {
	var seed int64 = 0
	var d, k, l int = 64, 6, 4
	data := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	var inDimensions, outDimentions int = 10, 2
	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(d, k, l)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	for i := 0; i < b.N; i++ {
		lsh := lsh.NewLSH(hash, decoder, projector)
		b.StopTimer()
		lsh.LSHHashStream(data, 1)
		b.StartTimer()
	}
}
Ejemplo n.º 3
0
func BenchmarkDBFriendlyProjection(b *testing.B) {
	var inDimensions, outDimentions int = 10, 2
	for i := 0; i < b.N; i++ {
		b.StopTimer()
		var randomGen = rand.New(rand.NewSource(int64(time.Now().Nanosecond())))
		data := make([]float64, inDimensions)
		for i := 0; i < inDimensions; i++ {
			data[i] = randomGen.Float64()
		}
		b.StartTimer()
		var seed int64 = int64(time.Now().Nanosecond())
		RP := projector.NewDBFriendly(inDimensions, outDimentions, seed)
		RP.Project(data)
	}
}
Ejemplo n.º 4
0
func (this *KMeans) Run() {
	//This is a condition to avoid infinite Run..
	maxiters := 10000
	swaps := 3
	fulldata := this.data
	data := make([][]float64, 0)
	var p types.Projector = nil
	if this.projectionDimension != 0 {
		p = projector.NewDBFriendly(len(fulldata[0]), this.projectionDimension, rand.Int63())
	}
	for _, v := range fulldata {
		if p != nil {
			data = append(data, p.Project(v))
		} else {
			data = append(data, v)
		}
	}
	this.n = len(data)
	this.means = make([][]float64, this.k)
	for i := 0; i < this.k; i++ {
		this.means[i] = data[i*(this.n/this.k)]
	}
	this.clusters = make([][]int, this.k)
	//initilize cluster lists to be evenly diveded sequentailly
	for i := 0; i < this.k; i++ {
		cluster := make([]int, this.n/this.k)
		clusterStart := i * (this.n / this.k)
		for j := 0; j < this.n/this.k; j++ {
			cluster[j] = j + clusterStart
		}
		this.clusters[i] = cluster
	}
	for swaps > 2 && maxiters > 0 {
		maxiters--
		this.UpdateMeans(data)
		swaps = this.AssignClusters(data)
	}
	if maxiters == 0 {
		fmt.Println("Warning: Max Iterations Reached")
	}
	data = fulldata
	this.UpdateMeans(data)
}
Ejemplo n.º 5
0
func TestDBFriendly(t *testing.T) {
	//There is probably a better way to test this than hard coding.
	data := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	expectedResult := []float64{1.224744871391589, 13.472193585307478}
	var inDimensions, outDimentions int = 10, 2
	//Use a uniform seed for testing
	var seed int64 = 0
	RP := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	result := RP.Project(data)
	if len(result) != len(expectedResult) {
		t.Error("The result and expected result are not the same length.")
	}
	for i := 0; i < len(result); i++ {
		if result[i] != expectedResult[i] {
			t.Error(fmt.Sprintf("The result at index %d: %f did not match the expected result: %f", i, result[i], expectedResult[i]))
		}
	}
	t.Log("√ DBFriendly Projector test complete")
}
Ejemplo n.º 6
0
// The datapoints are seeded in so that the first two data points are near eachother in euclidian geometery and the 3rd and 4th datapoint are
// near eachother in euclidian geometery. So the result1Cluster1 and result2Cluster1 should be closer together than the other two points.
// The same is true for the points in cluster two vs either point in cluster one.
func TestLSHSimple(t *testing.T) {
	var seed int64 = 0
	// We want to limit the dimension reduction because it causes a lot of noise.
	var inDimensions, outDimentions, numberOfClusters, numberOfSearches int = 10, 5, 3, 1
	dataPoint1Cluster1 := []float64{1.0, 0.0, 2.0, 7.0, 4.0, 0.0, 8.0, 3.0, 2.0, 1.0}
	dataPoint2Cluster1 := []float64{2.0, 3.0, 2.0, 6.0, 5.5, 2.0, 8.0, 3.1, 2.0, 0.0}

	dataPoint1Cluster2 := []float64{100.0, -120.0, 6.0, 18.0, 209.0, 0.0, -2.0, 1036.0, 15.0, 123.0}
	dataPoint2Cluster2 := []float64{99.0, -119.0, 2.0, 18.0, 208.5, 0.0, -3.0, 1048.0, 13.0, 122.0}

	hash := hash.NewMurmur(1<<63 - 1)
	decoder := decoder.NewSpherical(inDimensions, numberOfClusters, numberOfSearches)
	projector := projector.NewDBFriendly(inDimensions, outDimentions, seed)
	lsh := lsh.NewLSH(hash, decoder, projector)
	result1Cluster1 := lsh.LSHHashSimple(dataPoint1Cluster1)
	result2Cluster1 := lsh.LSHHashSimple(dataPoint2Cluster1)
	result1Cluster2 := lsh.LSHHashSimple(dataPoint1Cluster2)
	result2Cluster2 := lsh.LSHHashSimple(dataPoint2Cluster2)
	// Assert that results are still localy sensetive based on the original euclidian geometry
	if math.Abs(float64(result1Cluster1-result2Cluster1)) > math.Abs(float64(result1Cluster1-result1Cluster2)) {
		t.Errorf("\nThe first datapoint in cluster two is closer to the first data point in cluster one than the second data point in cluster one"+
			"\ndatapoint cluster one datapoint one: %d, \ndatapoint cluster one datapoint two: %d, \ndatapoint cluster two datapoint one: %d",
			result1Cluster1, result2Cluster1, result1Cluster2)
	}
	if math.Abs(float64(result1Cluster1-result2Cluster1)) > math.Abs(float64(result1Cluster1-result2Cluster2)) {
		t.Errorf("\nThe second datapoint in cluster two is closer to the first data point in cluster one than the second data point in cluster one"+
			"\nCluster one datapoint one: %d, \nCluster one datapoint two: %d, \nCluster two datapoint two: %d",
			result1Cluster1, result2Cluster1, result2Cluster2)
	}
	if math.Abs(float64(result1Cluster2-result2Cluster2)) > math.Abs(float64(result1Cluster1-result1Cluster2)) {
		t.Errorf("\nThe first datapoint in cluster one is closer to the first data point in cluster two than the second data point in cluster two"+
			"\nCluster one datapoint one: %d, \nCluster two datapoint one: %d, \nCluster two datapoint two: %d",
			result1Cluster1, result1Cluster2, result2Cluster2)
	}

	t.Log("√ LSH Simple test complete")
}
Ejemplo n.º 7
0
func NewProjector(n, t int, randomseed int64) types.Projector {
	return projector.NewDBFriendly(n, t, randomseed)
}