Example #1
0
// CombineClusters combines freshly found clusters with existing clusters.
//
//  Algorithm:
//    Run clustering and pick out the "Interesting" clusters.
//    Compare all the Interesting clusters to all the existing relevant clusters,
//      where "relevant" clusters are ones whose Hash/timestamp of the step
//      exists in the current tile.
//    Start with an empty "list".
//    For each cluster:
//      For each relevant existing cluster:
//        Take the top 20 keys from the existing cluster and count how many appear
//        in the cluster.
//      If there are no matches then this is a new cluster, add it to the "list".
//      If there are matches, possibly to multiple existing clusters, find the
//      existing cluster with the most matches.
//        Take the cluster (old/new) with the most members, or the best fit if
//        they have the same number of matches.
//    Return all the updated clusters.
func CombineClusters(freshSummaries, oldSummaries []*types.ClusterSummary) []*types.ClusterSummary {
	ret := []*types.ClusterSummary{}

	stillFresh := []*types.ClusterSummary{}
	// If two cluster summaries have the same hash and same Regression direction
	// then they are the same, merge them together.
	for _, fresh := range freshSummaries {
		for _, old := range oldSummaries {
			if fresh.Hash == old.Hash && math.Signbit(fresh.StepFit.Regression) == math.Signbit(old.StepFit.Regression) {
				old.Merge(fresh)
				ret = append(ret, old)
				break
			}
		}
		stillFresh = append(stillFresh, fresh)
	}

	// Even if a summary has a different hash it might still be the same event if
	// there is an overlap in the traces each summary contains.
	for _, fresh := range stillFresh {
		var bestMatch *types.ClusterSummary = nil
		bestMatchHits := 0
		for _, old := range oldSummaries {
			hits := 0
			for _, key := range util.AtMost(old.Keys, 20) {
				if util.In(key, fresh.Keys) {
					hits += 1
				}
			}
			if hits > bestMatchHits {
				bestMatchHits = hits
				bestMatch = old
			}
		}
		if bestMatch != nil {
			keysLengthEqual := len(fresh.Keys) == len(bestMatch.Keys)
			regressionInSameDirection := math.Signbit(fresh.StepFit.Regression) == math.Signbit(bestMatch.StepFit.Regression)
			freshHasBetterFit := math.Abs(fresh.StepFit.Regression) > math.Abs(bestMatch.StepFit.Regression)
			freshHasMoreKeys := len(fresh.Keys) > len(bestMatch.Keys)
			if freshHasMoreKeys || (keysLengthEqual && regressionInSameDirection && freshHasBetterFit) {
				fresh.Status = bestMatch.Status
				fresh.Message = bestMatch.Message
				fresh.ID = bestMatch.ID
				fresh.Bugs = bestMatch.Bugs
				ret = append(ret, fresh)
				// Find the bestMatch in oldSummaries and replace it with fresh.
				for i, oldBest := range oldSummaries {
					if oldBest == bestMatch {
						oldSummaries[i] = fresh
						break
					}
				}
			}
		} else {
			ret = append(ret, fresh)
		}
	}
	return ret
}
Example #2
0
// CombineClusters combines freshly found clusters with existing clusters.
//
//  Algorithm:
//    Run clustering and pick out the "Interesting" clusters.
//    Compare all the Interesting clusters to all the existing relevant clusters,
//      where "relevant" clusters are ones whose Hash/timestamp of the step
//      exists in the current tile.
//    Start with an empty "list".
//    For each cluster:
//      For each relevant existing cluster:
//        Take the top 20 keys from the existing cluster and count how many appear
//        in the cluster.
//      If there are no matches then this is a new cluster, add it to the "list".
//      If there are matches, possibly to multiple existing clusters, find the
//      existing cluster with the most matches.
//        Take the cluster (old/new) with the most members, or the best fit if
//        they have the same number of matches.
//    Return all the updated clusters.
func CombineClusters(freshSummaries, oldSummaries []*types.ClusterSummary) []*types.ClusterSummary {
	ret := []*types.ClusterSummary{}

	stillFresh := []*types.ClusterSummary{}
	// If two cluster summaries have the same hash and same Regression direction
	// then they are the same, merge them together.
	for _, fresh := range freshSummaries {
		found := false
		for _, old := range oldSummaries {
			if fresh.Hash == old.Hash && math.Signbit(fresh.StepFit.Regression) == math.Signbit(old.StepFit.Regression) {
				old.Merge(fresh)
				glog.Infof("Updating Traces for: %d", old.ID)
				old.Traces = fresh.Traces
				ret = append(ret, old)
				found = true
				break
			}
		}
		if !found {
			stillFresh = append(stillFresh, fresh)
		}
	}

	// Now process the remaining fresh clusters.
	for _, fresh := range stillFresh {
		// Even if a summary has a different hash it might still be the same event if
		// there is an overlap in the traces each summary contains. Remember that hashes
		// may move as backfilling occurs.
		var bestMatch *types.ClusterSummary = nil
		bestMatchHits := 0
		for _, old := range oldSummaries {
			hits := 0
			for _, key := range util.AtMost(old.Keys, 20) {
				if util.In(key, fresh.Keys) {
					hits += 1
				}
			}
			if hits > bestMatchHits {
				bestMatchHits = hits
				bestMatch = old
			}
		}
		if bestMatch != nil { // There is at least one common trace.
			regressionInSameDirection := math.Signbit(fresh.StepFit.Regression) == math.Signbit(bestMatch.StepFit.Regression)
			freshHasBetterFit := math.Abs(fresh.StepFit.Regression) > math.Abs(bestMatch.StepFit.Regression)
			if regressionInSameDirection && freshHasBetterFit {
				fresh.Merge(bestMatch)
				fresh.Status = bestMatch.Status
				fresh.Message = bestMatch.Message
				fresh.ID = bestMatch.ID
				fresh.Bugs = bestMatch.Bugs
				ret = append(ret, fresh)
				// Find the bestMatch in oldSummaries and replace it with fresh.
				for i, oldBest := range oldSummaries {
					if oldBest == bestMatch {
						oldSummaries[i] = fresh
						break
					}
				}
			} else {
				ret = append(ret, bestMatch)
			}
		} else {
			ret = append(ret, fresh)
		}
	}
	return ret
}