// Write writes a ClusterSummary to the datastore. // // If the ID is set to -1 then write it as a new entry, otherwise update the // existing entry. func Write(c *types.ClusterSummary) error { // First trim down c.Traces to just the first entry, which is the centroid. c.Traces = c.Traces[:1] b, err := json.Marshal(c) if err != nil { return fmt.Errorf("Failed to encode to JSON: %s", err) } if c.ID == -1 { _, err := db.DB.Exec( "INSERT INTO clusters (ts, hash, regression, cluster, status, message) VALUES (?, ?, ?, ?, ?, ?)", c.Timestamp, c.Hash, c.StepFit.Regression, string(b), c.Status, c.Message) if err != nil { return fmt.Errorf("Failed to write to database: %s", err) } } else { _, err := db.DB.Exec( "UPDATE clusters SET ts=?, hash=?, regression=?, cluster=?, status=?, message=? WHERE id=?", c.Timestamp, c.Hash, c.StepFit.Regression, string(b), c.Status, c.Message, c.ID) if err != nil { return fmt.Errorf("Failed to update database: %s", err) } } calcNewClusters() return nil }
// CombineClusters combines freshly found clusters with existing clusters. // // Algorithm: // Run clustering and pick out the "Interesting" clusters. // Compare all the Interesting clusters to all the existing relevant clusters, // where "relevant" clusters are ones whose Hash/timestamp of the step // exists in the current tile. // Start with an empty "list". // For each cluster: // For each relevant existing cluster: // Take the top 20 keys from the existing cluster and count how many appear // in the cluster. // If there are no matches then this is a new cluster, add it to the "list". // If there are matches, possibly to multiple existing clusters, find the // existing cluster with the most matches. // Take the cluster (old/new) with the most members, or the best fit if // they have the same number of matches. // Return all the updated clusters. func CombineClusters(freshSummaries, oldSummaries []*types.ClusterSummary) []*types.ClusterSummary { ret := []*types.ClusterSummary{} stillFresh := []*types.ClusterSummary{} // If two cluster summaries have the same hash and same Regression direction // then they are the same, merge them together. for _, fresh := range freshSummaries { found := false for _, old := range oldSummaries { if fresh.Hash == old.Hash && math.Signbit(fresh.StepFit.Regression) == math.Signbit(old.StepFit.Regression) { old.Merge(fresh) glog.Infof("Updating Traces for: %d", old.ID) old.Traces = fresh.Traces ret = append(ret, old) found = true break } } if !found { stillFresh = append(stillFresh, fresh) } } // Even if a summary has a different hash it might still be the same event if // there is an overlap in the traces each summary contains. for _, fresh := range stillFresh { var bestMatch *types.ClusterSummary = nil bestMatchHits := 0 for _, old := range oldSummaries { hits := 0 for _, key := range util.AtMost(old.Keys, 20) { if util.In(key, fresh.Keys) { hits += 1 } } if hits > bestMatchHits { bestMatchHits = hits bestMatch = old } } if bestMatch != nil { keysLengthEqual := len(fresh.Keys) == len(bestMatch.Keys) regressionInSameDirection := math.Signbit(fresh.StepFit.Regression) == math.Signbit(bestMatch.StepFit.Regression) freshHasBetterFit := math.Abs(fresh.StepFit.Regression) > math.Abs(bestMatch.StepFit.Regression) freshHasMoreKeys := len(fresh.Keys) > len(bestMatch.Keys) if freshHasMoreKeys || (keysLengthEqual && regressionInSameDirection && freshHasBetterFit) { fresh.Merge(bestMatch) fresh.Status = bestMatch.Status fresh.Message = bestMatch.Message fresh.ID = bestMatch.ID fresh.Bugs = bestMatch.Bugs ret = append(ret, fresh) // Find the bestMatch in oldSummaries and replace it with fresh. for i, oldBest := range oldSummaries { if oldBest == bestMatch { oldSummaries[i] = fresh break } } } else { bestMatch.Traces = fresh.Traces ret = append(ret, bestMatch) } } else { ret = append(ret, fresh) } } return ret }