// CombineClusters combines freshly found clusters with existing clusters. // // Algorithm: // Run clustering and pick out the "Interesting" clusters. // Compare all the Interesting clusters to all the existing relevant clusters, // where "relevant" clusters are ones whose Hash/timestamp of the step // exists in the current tile. // Start with an empty "list". // For each cluster: // For each relevant existing cluster: // Take the top 20 keys from the existing cluster and count how many appear // in the cluster. // If there are no matches then this is a new cluster, add it to the "list". // If there are matches, possibly to multiple existing clusters, find the // existing cluster with the most matches. // Take the cluster (old/new) with the most members, or the best fit if // they have the same number of matches. // Return all the updated clusters. func CombineClusters(freshSummaries, oldSummaries []*types.ClusterSummary) []*types.ClusterSummary { ret := []*types.ClusterSummary{} stillFresh := []*types.ClusterSummary{} // If two cluster summaries have the same hash and same Regression direction // then they are the same, merge them together. for _, fresh := range freshSummaries { for _, old := range oldSummaries { if fresh.Hash == old.Hash && math.Signbit(fresh.StepFit.Regression) == math.Signbit(old.StepFit.Regression) { old.Merge(fresh) ret = append(ret, old) break } } stillFresh = append(stillFresh, fresh) } // Even if a summary has a different hash it might still be the same event if // there is an overlap in the traces each summary contains. for _, fresh := range stillFresh { var bestMatch *types.ClusterSummary = nil bestMatchHits := 0 for _, old := range oldSummaries { hits := 0 for _, key := range util.AtMost(old.Keys, 20) { if util.In(key, fresh.Keys) { hits += 1 } } if hits > bestMatchHits { bestMatchHits = hits bestMatch = old } } if bestMatch != nil { keysLengthEqual := len(fresh.Keys) == len(bestMatch.Keys) regressionInSameDirection := math.Signbit(fresh.StepFit.Regression) == math.Signbit(bestMatch.StepFit.Regression) freshHasBetterFit := math.Abs(fresh.StepFit.Regression) > math.Abs(bestMatch.StepFit.Regression) freshHasMoreKeys := len(fresh.Keys) > len(bestMatch.Keys) if freshHasMoreKeys || (keysLengthEqual && regressionInSameDirection && freshHasBetterFit) { fresh.Status = bestMatch.Status fresh.Message = bestMatch.Message fresh.ID = bestMatch.ID fresh.Bugs = bestMatch.Bugs ret = append(ret, fresh) // Find the bestMatch in oldSummaries and replace it with fresh. for i, oldBest := range oldSummaries { if oldBest == bestMatch { oldSummaries[i] = fresh break } } } } else { ret = append(ret, fresh) } } return ret }
// CombineClusters combines freshly found clusters with existing clusters. // // Algorithm: // Run clustering and pick out the "Interesting" clusters. // Compare all the Interesting clusters to all the existing relevant clusters, // where "relevant" clusters are ones whose Hash/timestamp of the step // exists in the current tile. // Start with an empty "list". // For each cluster: // For each relevant existing cluster: // Take the top 20 keys from the existing cluster and count how many appear // in the cluster. // If there are no matches then this is a new cluster, add it to the "list". // If there are matches, possibly to multiple existing clusters, find the // existing cluster with the most matches. // Take the cluster (old/new) with the most members, or the best fit if // they have the same number of matches. // Return all the updated clusters. func CombineClusters(freshSummaries, oldSummaries []*types.ClusterSummary) []*types.ClusterSummary { ret := []*types.ClusterSummary{} stillFresh := []*types.ClusterSummary{} // If two cluster summaries have the same hash and same Regression direction // then they are the same, merge them together. for _, fresh := range freshSummaries { found := false for _, old := range oldSummaries { if fresh.Hash == old.Hash && math.Signbit(fresh.StepFit.Regression) == math.Signbit(old.StepFit.Regression) { old.Merge(fresh) glog.Infof("Updating Traces for: %d", old.ID) old.Traces = fresh.Traces ret = append(ret, old) found = true break } } if !found { stillFresh = append(stillFresh, fresh) } } // Now process the remaining fresh clusters. for _, fresh := range stillFresh { // Even if a summary has a different hash it might still be the same event if // there is an overlap in the traces each summary contains. Remember that hashes // may move as backfilling occurs. var bestMatch *types.ClusterSummary = nil bestMatchHits := 0 for _, old := range oldSummaries { hits := 0 for _, key := range util.AtMost(old.Keys, 20) { if util.In(key, fresh.Keys) { hits += 1 } } if hits > bestMatchHits { bestMatchHits = hits bestMatch = old } } if bestMatch != nil { // There is at least one common trace. regressionInSameDirection := math.Signbit(fresh.StepFit.Regression) == math.Signbit(bestMatch.StepFit.Regression) freshHasBetterFit := math.Abs(fresh.StepFit.Regression) > math.Abs(bestMatch.StepFit.Regression) if regressionInSameDirection && freshHasBetterFit { fresh.Merge(bestMatch) fresh.Status = bestMatch.Status fresh.Message = bestMatch.Message fresh.ID = bestMatch.ID fresh.Bugs = bestMatch.Bugs ret = append(ret, fresh) // Find the bestMatch in oldSummaries and replace it with fresh. for i, oldBest := range oldSummaries { if oldBest == bestMatch { oldSummaries[i] = fresh break } } } else { ret = append(ret, bestMatch) } } else { ret = append(ret, fresh) } } return ret }