func TestCentroidsAndTraces(t *testing.T) {
	centroids, traces, f, reverse := CentroidsAndTraces(paramset, traceparams, 2)
	assert.Equal(t, 2, len(centroids))
	assert.Equal(t, 4, len(traces))
	var tr4 *Trace
	for _, clusterable := range traces {
		tr4 = clusterable.(*Trace)
		if tr4.ID == "tr4" {
			break
		}
	}
	assert.Equal(t, 0, tr4.Params[1], "Missing param should map to 0.")
	assert.Equal(t, 3, tr4.Params[0], "The value gpu should sort to last of the three params.")
	assert.Equal(t, traceparams["tr4"], reverse(tr4))
	kmeansCentroid := f([]kmeans.Clusterable{tr4, tr4})
	centroid := kmeansCentroid.(*Centroid)
	assert.Equal(t, 0.0, centroid.Distance(tr4))
	var tr1 *Trace
	for _, clusterable := range traces {
		tr1 = clusterable.(*Trace)
		if tr1.ID == "tr1" {
			break
		}
	}
	assert.Equal(t, 2.0, centroid.Distance(tr1))

	// Now run the k-means algorithm, which is deterministic for a fixed set of
	// starting centroids, so pick our centroids explicitly so we always get the
	// same answer.
	obs := make([]kmeans.Clusterable, len(traces))
	for i, tr := range traces {
		obs[i] = tr
	}
	cent := []kmeans.Centroid{
		f([]kmeans.Clusterable{tr1}),
		f([]kmeans.Clusterable{tr4}),
	}
	kmCentroids, kmClusters := kmeans.KMeans(obs, cent, 2, 10, f)
	assert.Equal(t, 2, len(kmCentroids))
	assert.Equal(t, 2, len(kmClusters))
	assert.Equal(t, 3, len(kmClusters[0]))
	assert.Equal(t, 1, len(kmClusters[1]))
	assert.Equal(t, "tr4", kmClusters[1][0].(*Trace).ID, "tr4 should be the singe member of the second cluster.")
	assert.InDelta(t, 2.7748, kmeans.TotalError(obs, kmCentroids), 0.01)

	// Run k-means again but with just one centroid and show the total error gets
	// larger.
	kmCentroids, kmClusters = kmeans.KMeans(obs, cent[:1], 2, 10, f)
	assert.Equal(t, 1, len(kmCentroids))
	assert.Equal(t, 1, len(kmClusters))
	assert.Equal(t, 4, len(kmClusters[0]))
	assert.InDelta(t, 4.42496, kmeans.TotalError(obs, kmCentroids), 0.01)
}
Example #2
0
// CalculateClusterSummaries runs k-means clustering over the trace shapes.
func CalculateClusterSummaries(tile *tiling.Tile, k int, stddevThreshhold float64, filter Filter) (*ClusterSummaries, error) {
	lastCommitIndex := tile.LastCommitIndex()
	observations := make([]kmeans.Clusterable, 0, len(tile.Traces))
	for key, trace := range tile.Traces {
		if filter(key, trace.(*types.PerfTrace)) {
			observations = append(observations, ctrace.NewFullTrace(string(key), trace.(*types.PerfTrace).Values[:lastCommitIndex+1], trace.Params(), stddevThreshhold))
		}
	}
	if len(observations) == 0 {
		return nil, fmt.Errorf("Zero traces matched.")
	}

	// Create K starting centroids.
	centroids := chooseK(observations, k)
	// TODO(jcgregorio) Keep iterating until the total error stops changing.
	lastTotalError := 0.0
	for i := 0; i < MAX_KMEANS_ITERATIONS; i++ {
		centroids = kmeans.Do(observations, centroids, ctrace.CalculateCentroid)
		totalError := kmeans.TotalError(observations, centroids)
		glog.Infof("Total Error: %f\n", totalError)
		if math.Abs(totalError-lastTotalError) < KMEAN_EPSILON {
			break
		}
		lastTotalError = totalError
	}
	clusterSummaries := GetClusterSummaries(observations, centroids, tile.Commits)
	clusterSummaries.K = k
	clusterSummaries.StdDevThreshhold = stddevThreshhold
	return clusterSummaries, nil
}