// CalculateClusterSummaries runs k-means clustering over the trace shapes. func CalculateClusterSummaries(tile *tiling.Tile, k int, stddevThreshhold float64, filter Filter) (*ClusterSummaries, error) { lastCommitIndex := tile.LastCommitIndex() observations := make([]kmeans.Clusterable, 0, len(tile.Traces)) for key, trace := range tile.Traces { if filter(key, trace.(*types.PerfTrace)) { observations = append(observations, ctrace.NewFullTrace(string(key), trace.(*types.PerfTrace).Values[:lastCommitIndex+1], trace.Params(), stddevThreshhold)) } } if len(observations) == 0 { return nil, fmt.Errorf("Zero traces matched.") } // Create K starting centroids. centroids := chooseK(observations, k) // TODO(jcgregorio) Keep iterating until the total error stops changing. lastTotalError := 0.0 for i := 0; i < MAX_KMEANS_ITERATIONS; i++ { centroids = kmeans.Do(observations, centroids, ctrace.CalculateCentroid) totalError := kmeans.TotalError(observations, centroids) glog.Infof("Total Error: %f\n", totalError) if math.Abs(totalError-lastTotalError) < KMEAN_EPSILON { break } lastTotalError = totalError } clusterSummaries := GetClusterSummaries(observations, centroids, tile.Commits) clusterSummaries.K = k clusterSummaries.StdDevThreshhold = stddevThreshhold return clusterSummaries, nil }
func forEachTestTraceDo(tile *tiling.Tile, fn func(string, []string)) { tileLen := tile.LastCommitIndex() + 1 for _, trace := range tile.Traces { gTrace := trace.(*types.GoldenTrace) testName := gTrace.Params()[types.PRIMARY_KEY_FIELD] fn(testName, gTrace.Values[:tileLen]) } }
// trimTime trims the Tile down to at most the last config.MAX_CLUSTER_COMMITS // commits, less if there aren't that many commits in the Tile. func trimTile(tile *tiling.Tile) (*tiling.Tile, error) { end := tile.LastCommitIndex() + 1 begin := end - config.MAX_CLUSTER_COMMITS if begin < 0 { begin = 0 } return tile.Trim(begin, end) }
// searchTile queries across a tile. func searchTile(q *Query, e *expstorage.Expectations, parsedQuery url.Values, storages *storage.Storage, tile *tiling.Tile, tallies *tally.Tallies, blamer *blame.Blamer, paramset *paramsets.Summary) ([]*Digest, []*tiling.Commit, error) { // TODO Use CommitRange to create a trimmed tile. traceTally := tallies.ByTrace() lastCommitIndex := tile.LastCommitIndex() // Loop over the tile and pull out all the digests that match // the query, collecting the matching traces as you go. Build // up a set of intermediate's that can then be used to calculate // Digest's. // map [test:digest] *intermediate inter := map[string]*intermediate{} for id, tr := range tile.Traces { if tiling.Matches(tr, parsedQuery) { test := tr.Params()[types.PRIMARY_KEY_FIELD] // Get all the digests digests := digestsFromTrace(id, tr, q.Head, lastCommitIndex, traceTally) for _, digest := range digests { cl := e.Classification(test, digest) if q.excludeClassification(cl) { continue } // Fix blamer to make this easier. if q.BlameGroupID != "" { if cl == types.UNTRIAGED { b := blamer.GetBlame(test, digest, tile.Commits) if q.BlameGroupID != blameGroupID(b, tile.Commits) { continue } } else { continue } } key := fmt.Sprintf("%s:%s", test, digest) if i, ok := inter[key]; !ok { inter[key] = newIntermediate(test, digest, id, tr, digests) } else { i.addTrace(id, tr, digests) } } } } // Now loop over all the intermediates and build a Digest for each one. ret := make([]*Digest, 0, len(inter)) for key, i := range inter { parts := strings.Split(key, ":") ret = append(ret, digestFromIntermediate(parts[0], parts[1], i, e, tile, tallies, blamer, storages.DiffStore, paramset, q.IncludeIgnores)) } return ret, tile.Commits, nil }
func (h *historian) processTile(tile *tiling.Tile) error { dStore := h.storages.DigestStore tileLen := tile.LastCommitIndex() + 1 var digestInfo *digeststore.DigestInfo var ok bool counter := 0 minMaxTimes := map[string]map[string]*digeststore.DigestInfo{} for _, trace := range tile.Traces { gTrace := trace.(*types.GoldenTrace) testName := trace.Params()[types.PRIMARY_KEY_FIELD] for idx, digest := range gTrace.Values[:tileLen] { if digest != types.MISSING_DIGEST { timeStamp := tile.Commits[idx].CommitTime if digestInfo, ok = minMaxTimes[testName][digest]; !ok { digestInfo = &digeststore.DigestInfo{ TestName: testName, Digest: digest, First: timeStamp, Last: timeStamp, } if testVal, ok := minMaxTimes[testName]; !ok { minMaxTimes[testName] = map[string]*digeststore.DigestInfo{digest: digestInfo} } else { testVal[digest] = digestInfo } counter++ } else { digestInfo.First = util.MinInt64(digestInfo.First, timeStamp) digestInfo.Last = util.MaxInt64(digestInfo.Last, timeStamp) } } } } digestInfos := make([]*digeststore.DigestInfo, 0, counter) for _, digests := range minMaxTimes { for _, digestInfo := range digests { digestInfos = append(digestInfos, digestInfo) } } return dStore.Update(digestInfos) }
// TileWithTryData will add all the trybot data for the given issue to the // given Tile. A new Tile that is a copy of the original Tile will be returned, // so we aren't modifying the underlying Tile. func TileWithTryData(tile *tiling.Tile, issue string) (*tiling.Tile, error) { ret := tile.Copy() lastCommitIndex := tile.LastCommitIndex() // The way we handle Tiles there is always empty space at the end of the // Tile of index -1. Use that space to inject the trybot results. ret.Commits[lastCommitIndex+1].CommitTime = time.Now().Unix() lastCommitIndex = ret.LastCommitIndex() tryResults, err := Get(issue) if err != nil { return nil, fmt.Errorf("AppendToTile: Failed to retreive trybot results: %s", err) } // Copy in the trybot data. for k, v := range tryResults.Values { if tr, ok := ret.Traces[k]; !ok { continue } else { tr.(*types.PerfTrace).Values[lastCommitIndex] = v } } return ret, nil }
// updateBlame reads from the provided tileStream and updates the current // blame lists. func (b *Blamer) updateBlame(tile *tiling.Tile) error { exp, err := b.storages.ExpectationsStore.Get() if err != nil { return err } defer timer.New("blame").Stop() // Note: blameStart and blameEnd are continously updated to contain the // smalles start and end index of the ranges for a testName/digest pair. blameStart := map[string]map[string]int{} blameEnd := map[string]map[string]int{} // blameRange stores the candidate ranges for a testName/digest pair. blameRange := map[string]map[string][][]int{} firstCommit := tile.Commits[0] tileLen := tile.LastCommitIndex() + 1 ret := map[string]map[string]*BlameDistribution{} for _, trace := range tile.Traces { gtr := trace.(*types.GoldenTrace) testName := gtr.Params()[types.PRIMARY_KEY_FIELD] // lastIdx tracks the index of the last digest that is definitely // not in the blamelist. lastIdx := -1 found := map[string]bool{} for idx, digest := range gtr.Values[:tileLen] { if digest == types.MISSING_DIGEST { continue } status := exp.Classification(testName, digest) if (status == types.UNTRIAGED) && !found[digest] { found[digest] = true var startIdx int endIdx := idx // If we have only seen empty digests, then we do not // consider any digest before the current one. if lastIdx == -1 { startIdx = idx } else { startIdx = lastIdx + 1 } // Get the info about this digest. digestInfo, err := b.storages.GetOrUpdateDigestInfo(testName, digest, tile.Commits[idx]) if err != nil { return err } // Check if the digest was first seen outside the current tile. isOld := digestInfo.First < firstCommit.CommitTime commitRange := []int{startIdx, endIdx} if blameStartFound, ok := blameStart[testName]; !ok { blameStart[testName] = map[string]int{digest: startIdx} blameEnd[testName] = map[string]int{digest: endIdx} blameRange[testName] = map[string][][]int{digest: [][]int{commitRange}} ret[testName] = map[string]*BlameDistribution{digest: &BlameDistribution{Old: isOld}} } else if currentStart, ok := blameStartFound[digest]; !ok { blameStart[testName][digest] = startIdx blameEnd[testName][digest] = endIdx blameRange[testName][digest] = [][]int{commitRange} ret[testName][digest] = &BlameDistribution{Old: isOld} } else { blameStart[testName][digest] = util.MinInt(currentStart, startIdx) blameEnd[testName][digest] = util.MinInt(blameEnd[testName][digest], endIdx) blameRange[testName][digest] = append(blameRange[testName][digest], commitRange) ret[testName][digest].Old = isOld || ret[testName][digest].Old } } lastIdx = idx } } commits := tile.Commits[:tileLen] for testName, digests := range blameRange { for digest, commitRanges := range digests { start := blameStart[testName][digest] end := blameEnd[testName][digest] freq := make([]int, len(commits)-start) for _, commitRange := range commitRanges { // If the commit range is nil, we cannot calculate the a // blamelist. if commitRange == nil { freq = []int{} break } // Calculate the blame. idxEnd := util.MinInt(commitRange[1], end) for i := commitRange[0]; i <= idxEnd; i++ { freq[i-start]++ } } ret[testName][digest].Freq = freq } } // Swap out the old blame lists for the new ones. b.mutex.Lock() b.testBlameLists, b.commits = ret, commits b.mutex.Unlock() return nil }
// buildTraces returns a Trace for the given intermediate. func buildTraces(test, digest string, inter *intermediate, e *expstorage.Expectations, tile *tiling.Tile, traceTally map[string]tally.Tally) *Traces { traceNames := make([]string, 0, len(inter.Traces)) for id, _ := range inter.Traces { traceNames = append(traceNames, id) } ret := &Traces{ TileSize: len(tile.Commits), Traces: []Trace{}, Digests: []DigestStatus{}, } sort.Strings(traceNames) last := tile.LastCommitIndex() y := 0 if len(traceNames) > 0 { ret.Digests = append(ret.Digests, DigestStatus{ Digest: digest, Status: e.Classification(test, digest).String(), }) } for _, id := range traceNames { t, ok := traceTally[id] if !ok { continue } if count, ok := t[digest]; !ok || count == 0 { continue } trace := inter.Traces[id].(*types.GoldenTrace) p := Trace{ Data: []Point{}, ID: id, Params: trace.Params(), } for i := last; i >= 0; i-- { if trace.IsMissing(i) { continue } // s is the status of the digest, it is either 0 for a match, or [1-8] if not. s := 0 if trace.Values[i] != digest { if index := digestIndex(trace.Values[i], ret.Digests); index != -1 { s = index } else { if len(ret.Digests) < 9 { d := trace.Values[i] ret.Digests = append(ret.Digests, DigestStatus{ Digest: d, Status: e.Classification(test, d).String(), }) s = len(ret.Digests) - 1 } else { s = 8 } } } p.Data = append(p.Data, Point{ X: i, Y: y, S: s, }) } sort.Sort(PointSlice(p.Data)) ret.Traces = append(ret.Traces, p) y += 1 } return ret }
// buildTraceData returns a populated []*Trace for all the traces that contain 'digest'. func buildTraceData(digest string, traceNames []string, tile *tiling.Tile, traceTally map[string]tally.Tally, exp *expstorage.Expectations) ([]*Trace, []*DigestStatus) { sort.Strings(traceNames) ret := []*Trace{} last := tile.LastCommitIndex() y := 0 // Keep track of the first 7 non-matching digests we encounter so we can color them differently. otherDigests := []*DigestStatus{} // Populate otherDigests with all the digests, including the one we are comparing against. if len(traceNames) > 0 { // Find the test name so we can look up the triage status. trace := tile.Traces[traceNames[0]].(*types.GoldenTrace) test := trace.Params()[types.PRIMARY_KEY_FIELD] otherDigests = append(otherDigests, &DigestStatus{ Digest: digest, Status: exp.Classification(test, digest).String(), }) } for _, id := range traceNames { t, ok := traceTally[id] if !ok { continue } if count, ok := t[digest]; !ok || count == 0 { continue } trace := tile.Traces[id].(*types.GoldenTrace) p := &Trace{ Data: []Point{}, Label: id, Params: trace.Params(), } for i := last; i >= 0; i-- { if trace.IsMissing(i) { continue } // s is the status of the digest, it is either 0 for a match, or [1-8] if not. s := 0 if trace.Values[i] != digest { if index := digestIndex(trace.Values[i], otherDigests); index != -1 { s = index } else { if len(otherDigests) < 9 { d := trace.Values[i] test := trace.Params()[types.PRIMARY_KEY_FIELD] otherDigests = append(otherDigests, &DigestStatus{ Digest: d, Status: exp.Classification(test, d).String(), }) s = len(otherDigests) - 1 } else { s = 8 } } } p.Data = append(p.Data, Point{ X: i, Y: y, S: s, }) } sort.Sort(PointSlice(p.Data)) ret = append(ret, p) y += 1 } return ret, otherDigests }
func (s *StatusWatcher) calcStatus(tile *tiling.Tile) error { defer timer.New("Calc status timer:").Stop() minCommitId := map[string]int{} okByCorpus := map[string]bool{} expectations, err := s.storages.ExpectationsStore.Get() if err != nil { return err } // Gathers unique labels by corpus and label. byCorpus := map[string]map[types.Label]map[string]bool{} // Iterate over the current traces tileLen := tile.LastCommitIndex() + 1 for _, trace := range tile.Traces { gTrace := trace.(*types.GoldenTrace) idx := tileLen - 1 for (idx >= 0) && (gTrace.Values[idx] == types.MISSING_DIGEST) { idx-- } // If this is an empty trace we ignore it for now. if idx == -1 { continue } // If this corpus doesn't exist yet, we initialize it. corpus := gTrace.Params()[types.CORPUS_FIELD] if _, ok := byCorpus[corpus]; !ok { minCommitId[corpus] = tileLen okByCorpus[corpus] = true byCorpus[corpus] = map[types.Label]map[string]bool{ types.POSITIVE: map[string]bool{}, types.NEGATIVE: map[string]bool{}, types.UNTRIAGED: map[string]bool{}, } if _, ok := corpusGauges[corpus]; !ok { corpusGauges[corpus] = map[types.Label]metrics.Gauge{ types.UNTRIAGED: metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.UNTRIAGED, corpus), nil), types.POSITIVE: metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.POSITIVE, corpus), nil), types.NEGATIVE: metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.NEGATIVE, corpus), nil), } } } // Account for the corpus and testname. digest := gTrace.Values[idx] testName := gTrace.Params()[types.PRIMARY_KEY_FIELD] status := expectations.Classification(testName, digest) digestInfo, err := s.storages.GetOrUpdateDigestInfo(testName, digest, tile.Commits[idx]) if err != nil { return err } okByCorpus[corpus] = okByCorpus[corpus] && ((status == types.POSITIVE) || ((status == types.NEGATIVE) && (len(digestInfo.IssueIDs) > 0))) minCommitId[corpus] = util.MinInt(idx, minCommitId[corpus]) byCorpus[corpus][status][digest] = true } commits := tile.Commits[:tileLen] overallOk := true allUntriagedCount := 0 allPositiveCount := 0 allNegativeCount := 0 corpStatus := make([]*GUICorpusStatus, 0, len(byCorpus)) for corpus := range byCorpus { overallOk = overallOk && okByCorpus[corpus] untriagedCount := len(byCorpus[corpus][types.UNTRIAGED]) positiveCount := len(byCorpus[corpus][types.POSITIVE]) negativeCount := len(byCorpus[corpus][types.NEGATIVE]) corpStatus = append(corpStatus, &GUICorpusStatus{ Name: corpus, OK: okByCorpus[corpus], MinCommitHash: commits[minCommitId[corpus]].Hash, UntriagedCount: untriagedCount, NegativeCount: negativeCount, }) allUntriagedCount += untriagedCount allNegativeCount += negativeCount allPositiveCount += positiveCount corpusGauges[corpus][types.POSITIVE].Update(int64(positiveCount)) corpusGauges[corpus][types.NEGATIVE].Update(int64(negativeCount)) corpusGauges[corpus][types.UNTRIAGED].Update(int64(untriagedCount)) } allUntriagedGauge.Update(int64(allUntriagedCount)) allPositiveGauge.Update(int64(allPositiveCount)) allNegativeGauge.Update(int64(allNegativeCount)) totalGauge.Update(int64(allUntriagedCount + allPositiveCount + allNegativeCount)) sort.Sort(CorpusStatusSorter(corpStatus)) // Swap out the current tile. result := &GUIStatus{ OK: overallOk, LastCommit: commits[tileLen-1], CorpStatus: corpStatus, } s.mutex.Lock() s.current = result s.mutex.Unlock() return nil }