Beispiel #1
0
func newCluster(keys []string, regression float64, hash string) *types.ClusterSummary {
	c := types.NewClusterSummary(len(keys), 0)
	for i, k := range keys {
		c.Keys[i] = k
	}
	c.StepFit.Regression = regression
	c.Hash = hash
	return c
}
// GetClusterSummaries returns a summaries for each cluster.
func GetClusterSummaries(observations []kmeans.Clusterable, centroids []kmeans.Centroid, commits []*tiling.Commit) *ClusterSummaries {
	ret := &ClusterSummaries{
		Clusters: make([]*types.ClusterSummary, len(centroids)),
	}
	allClusters, _ := kmeans.GetClusters(observations, centroids)

	for i, cluster := range allClusters {
		// cluster is just an array of the observations for a given cluster.
		numSampleTraces := len(cluster)
		if numSampleTraces > config.MAX_SAMPLE_TRACES_PER_CLUSTER {
			numSampleTraces = config.MAX_SAMPLE_TRACES_PER_CLUSTER
		}
		stepFit := getStepFit(cluster[0].(*ctrace.ClusterableTrace).Values)
		summary := types.NewClusterSummary(len(cluster)-1, numSampleTraces)
		summary.ParamSummaries = getParamSummaries(cluster)
		summary.StepFit = stepFit
		summary.Hash = commits[stepFit.TurningPoint].Hash
		summary.Timestamp = commits[stepFit.TurningPoint].CommitTime

		for j, o := range cluster {
			if j != 0 {
				summary.Keys[j-1] = o.(*ctrace.ClusterableTrace).Key
			}
		}
		// First, sort the traces so they are order with the traces closest to the
		// centroid first.
		sc := []*SortableClusterable{}
		for j := 0; j < len(cluster); j++ {
			sc = append(sc, &SortableClusterable{
				Cluster:  cluster[j],
				Distance: centroids[i].Distance(cluster[j]),
			})
		}
		// Sort, but leave the centroid, the 0th element, unmoved.
		sort.Sort(SortableClusterableSlice(sc[1:]))

		for j, clusterable := range sc {
			if j == numSampleTraces {
				break
			}
			summary.Traces[j] = traceToFlot(clusterable.Cluster.(*ctrace.ClusterableTrace))
		}

		ret.Clusters[i] = summary
	}
	sort.Sort(SortableClusterSummarySlice(ret.Clusters))

	return ret
}