func newCluster(keys []string, regression float64, hash string) *types.ClusterSummary { c := types.NewClusterSummary(len(keys), 0) for i, k := range keys { c.Keys[i] = k } c.StepFit.Regression = regression c.Hash = hash return c }
// GetClusterSummaries returns a summaries for each cluster. func GetClusterSummaries(observations []kmeans.Clusterable, centroids []kmeans.Centroid, commits []*tiling.Commit) *ClusterSummaries { ret := &ClusterSummaries{ Clusters: make([]*types.ClusterSummary, len(centroids)), } allClusters, _ := kmeans.GetClusters(observations, centroids) for i, cluster := range allClusters { // cluster is just an array of the observations for a given cluster. numSampleTraces := len(cluster) if numSampleTraces > config.MAX_SAMPLE_TRACES_PER_CLUSTER { numSampleTraces = config.MAX_SAMPLE_TRACES_PER_CLUSTER } stepFit := getStepFit(cluster[0].(*ctrace.ClusterableTrace).Values) summary := types.NewClusterSummary(len(cluster)-1, numSampleTraces) summary.ParamSummaries = getParamSummaries(cluster) summary.StepFit = stepFit summary.Hash = commits[stepFit.TurningPoint].Hash summary.Timestamp = commits[stepFit.TurningPoint].CommitTime for j, o := range cluster { if j != 0 { summary.Keys[j-1] = o.(*ctrace.ClusterableTrace).Key } } // First, sort the traces so they are order with the traces closest to the // centroid first. sc := []*SortableClusterable{} for j := 0; j < len(cluster); j++ { sc = append(sc, &SortableClusterable{ Cluster: cluster[j], Distance: centroids[i].Distance(cluster[j]), }) } // Sort, but leave the centroid, the 0th element, unmoved. sort.Sort(SortableClusterableSlice(sc[1:])) for j, clusterable := range sc { if j == numSampleTraces { break } summary.Traces[j] = traceToFlot(clusterable.Cluster.(*ctrace.ClusterableTrace)) } ret.Clusters[i] = summary } sort.Sort(SortableClusterSummarySlice(ret.Clusters)) return ret }