// CalculateClusterSummaries runs k-means clustering over the trace shapes.
func CalculateClusterSummaries(tile *tiling.Tile, k int, stddevThreshhold float64, filter Filter) (*ClusterSummaries, error) {
	lastCommitIndex := tile.LastCommitIndex()
	observations := make([]kmeans.Clusterable, 0, len(tile.Traces))
	for key, trace := range tile.Traces {
		if filter(key, trace.(*types.PerfTrace)) {
			observations = append(observations, ctrace.NewFullTrace(string(key), trace.(*types.PerfTrace).Values[:lastCommitIndex+1], trace.Params(), stddevThreshhold))
		}
	}
	if len(observations) == 0 {
		return nil, fmt.Errorf("Zero traces matched.")
	}

	// Create K starting centroids.
	centroids := chooseK(observations, k)
	// TODO(jcgregorio) Keep iterating until the total error stops changing.
	lastTotalError := 0.0
	for i := 0; i < MAX_KMEANS_ITERATIONS; i++ {
		centroids = kmeans.Do(observations, centroids, ctrace.CalculateCentroid)
		totalError := kmeans.TotalError(observations, centroids)
		glog.Infof("Total Error: %f\n", totalError)
		if math.Abs(totalError-lastTotalError) < KMEAN_EPSILON {
			break
		}
		lastTotalError = totalError
	}
	clusterSummaries := GetClusterSummaries(observations, centroids, tile.Commits)
	clusterSummaries.K = k
	clusterSummaries.StdDevThreshhold = stddevThreshhold
	return clusterSummaries, nil
}
func forEachTestTraceDo(tile *tiling.Tile, fn func(string, []string)) {
	tileLen := tile.LastCommitIndex() + 1
	for _, trace := range tile.Traces {
		gTrace := trace.(*types.GoldenTrace)
		testName := gTrace.Params()[types.PRIMARY_KEY_FIELD]
		fn(testName, gTrace.Values[:tileLen])
	}
}
Beispiel #3
0
// trimTime trims the Tile down to at most the last config.MAX_CLUSTER_COMMITS
// commits, less if there aren't that many commits in the Tile.
func trimTile(tile *tiling.Tile) (*tiling.Tile, error) {
	end := tile.LastCommitIndex() + 1
	begin := end - config.MAX_CLUSTER_COMMITS
	if begin < 0 {
		begin = 0
	}
	return tile.Trim(begin, end)
}
Beispiel #4
0
// searchTile queries across a tile.
func searchTile(q *Query, e *expstorage.Expectations, parsedQuery url.Values, storages *storage.Storage, tile *tiling.Tile, tallies *tally.Tallies, blamer *blame.Blamer, paramset *paramsets.Summary) ([]*Digest, []*tiling.Commit, error) {
	// TODO Use CommitRange to create a trimmed tile.

	traceTally := tallies.ByTrace()
	lastCommitIndex := tile.LastCommitIndex()

	// Loop over the tile and pull out all the digests that match
	// the query, collecting the matching traces as you go. Build
	// up a set of intermediate's that can then be used to calculate
	// Digest's.

	// map [test:digest] *intermediate
	inter := map[string]*intermediate{}
	for id, tr := range tile.Traces {
		if tiling.Matches(tr, parsedQuery) {
			test := tr.Params()[types.PRIMARY_KEY_FIELD]
			// Get all the digests
			digests := digestsFromTrace(id, tr, q.Head, lastCommitIndex, traceTally)
			for _, digest := range digests {
				cl := e.Classification(test, digest)
				if q.excludeClassification(cl) {
					continue
				}

				// Fix blamer to make this easier.
				if q.BlameGroupID != "" {
					if cl == types.UNTRIAGED {
						b := blamer.GetBlame(test, digest, tile.Commits)
						if q.BlameGroupID != blameGroupID(b, tile.Commits) {
							continue
						}
					} else {
						continue
					}
				}
				key := fmt.Sprintf("%s:%s", test, digest)
				if i, ok := inter[key]; !ok {
					inter[key] = newIntermediate(test, digest, id, tr, digests)
				} else {
					i.addTrace(id, tr, digests)
				}
			}
		}
	}
	// Now loop over all the intermediates and build a Digest for each one.
	ret := make([]*Digest, 0, len(inter))
	for key, i := range inter {
		parts := strings.Split(key, ":")
		ret = append(ret, digestFromIntermediate(parts[0], parts[1], i, e, tile, tallies, blamer, storages.DiffStore, paramset, q.IncludeIgnores))
	}
	return ret, tile.Commits, nil
}
Beispiel #5
0
func (h *historian) processTile(tile *tiling.Tile) error {
	dStore := h.storages.DigestStore
	tileLen := tile.LastCommitIndex() + 1

	var digestInfo *digeststore.DigestInfo
	var ok bool
	counter := 0
	minMaxTimes := map[string]map[string]*digeststore.DigestInfo{}
	for _, trace := range tile.Traces {
		gTrace := trace.(*types.GoldenTrace)
		testName := trace.Params()[types.PRIMARY_KEY_FIELD]
		for idx, digest := range gTrace.Values[:tileLen] {
			if digest != types.MISSING_DIGEST {
				timeStamp := tile.Commits[idx].CommitTime
				if digestInfo, ok = minMaxTimes[testName][digest]; !ok {
					digestInfo = &digeststore.DigestInfo{
						TestName: testName,
						Digest:   digest,
						First:    timeStamp,
						Last:     timeStamp,
					}

					if testVal, ok := minMaxTimes[testName]; !ok {
						minMaxTimes[testName] = map[string]*digeststore.DigestInfo{digest: digestInfo}
					} else {
						testVal[digest] = digestInfo
					}
					counter++
				} else {
					digestInfo.First = util.MinInt64(digestInfo.First, timeStamp)
					digestInfo.Last = util.MaxInt64(digestInfo.Last, timeStamp)
				}
			}
		}
	}

	digestInfos := make([]*digeststore.DigestInfo, 0, counter)
	for _, digests := range minMaxTimes {
		for _, digestInfo := range digests {
			digestInfos = append(digestInfos, digestInfo)
		}
	}

	return dStore.Update(digestInfos)
}
Beispiel #6
0
// SkpCommits returns the indices for all the commits that contain SKP updates.
func (g *GitInfo) SkpCommits(tile *tiling.Tile) ([]int, error) {
	// Executes a git log command that looks like:
	//
	//   git log --format=format:%H  32956400b4d8f33394e2cdef9b66e8369ba2a0f3..e7416bfc9858bde8fc6eb5f3bfc942bc3350953a SKP_VERSION
	//
	// The output should be a \n separated list of hashes that match.
	first, last := tile.CommitRange()
	output, err := exec.RunCwd(g.dir, "git", "log", "--format=format:%H", first+".."+last, "SKP_VERSION")
	if err != nil {
		return nil, fmt.Errorf("SkpCommits: Failed to find git log of SKP_VERSION: %s", err)
	}
	hashes := strings.Split(output, "\n")

	ret := []int{}
	for i, c := range tile.Commits {
		if c.CommitTime != 0 && util.In(c.Hash, hashes) {
			ret = append(ret, i)
		}
	}
	return ret, nil
}
Beispiel #7
0
// TileWithTryData will add all the trybot data for the given issue to the
// given Tile. A new Tile that is a copy of the original Tile will be returned,
// so we aren't modifying the underlying Tile.
func TileWithTryData(tile *tiling.Tile, issue string) (*tiling.Tile, error) {
	ret := tile.Copy()
	lastCommitIndex := tile.LastCommitIndex()
	// The way we handle Tiles there is always empty space at the end of the
	// Tile of index -1. Use that space to inject the trybot results.
	ret.Commits[lastCommitIndex+1].CommitTime = time.Now().Unix()
	lastCommitIndex = ret.LastCommitIndex()

	tryResults, err := Get(issue)
	if err != nil {
		return nil, fmt.Errorf("AppendToTile: Failed to retreive trybot results: %s", err)
	}
	// Copy in the trybot data.
	for k, v := range tryResults.Values {
		if tr, ok := ret.Traces[k]; !ok {
			continue
		} else {
			tr.(*types.PerfTrace).Values[lastCommitIndex] = v
		}
	}
	return ret, nil
}
Beispiel #8
0
// updateBlame reads from the provided tileStream and updates the current
// blame lists.
func (b *Blamer) updateBlame(tile *tiling.Tile) error {
	exp, err := b.storages.ExpectationsStore.Get()
	if err != nil {
		return err
	}

	defer timer.New("blame").Stop()

	// Note: blameStart and blameEnd are continously updated to contain the
	// smalles start and end index of the ranges for a testName/digest pair.
	blameStart := map[string]map[string]int{}
	blameEnd := map[string]map[string]int{}

	// blameRange stores the candidate ranges for a testName/digest pair.
	blameRange := map[string]map[string][][]int{}
	firstCommit := tile.Commits[0]
	tileLen := tile.LastCommitIndex() + 1
	ret := map[string]map[string]*BlameDistribution{}

	for _, trace := range tile.Traces {
		gtr := trace.(*types.GoldenTrace)
		testName := gtr.Params()[types.PRIMARY_KEY_FIELD]

		// lastIdx tracks the index of the last digest that is definitely
		// not in the blamelist.
		lastIdx := -1
		found := map[string]bool{}
		for idx, digest := range gtr.Values[:tileLen] {
			if digest == types.MISSING_DIGEST {
				continue
			}

			status := exp.Classification(testName, digest)
			if (status == types.UNTRIAGED) && !found[digest] {
				found[digest] = true

				var startIdx int
				endIdx := idx

				// If we have only seen empty digests, then we do not
				// consider any digest before the current one.
				if lastIdx == -1 {
					startIdx = idx
				} else {
					startIdx = lastIdx + 1
				}

				// Get the info about this digest.
				digestInfo, err := b.storages.GetOrUpdateDigestInfo(testName, digest, tile.Commits[idx])
				if err != nil {
					return err
				}

				// Check if the digest was first seen outside the current tile.
				isOld := digestInfo.First < firstCommit.CommitTime
				commitRange := []int{startIdx, endIdx}
				if blameStartFound, ok := blameStart[testName]; !ok {
					blameStart[testName] = map[string]int{digest: startIdx}
					blameEnd[testName] = map[string]int{digest: endIdx}
					blameRange[testName] = map[string][][]int{digest: [][]int{commitRange}}
					ret[testName] = map[string]*BlameDistribution{digest: &BlameDistribution{Old: isOld}}
				} else if currentStart, ok := blameStartFound[digest]; !ok {
					blameStart[testName][digest] = startIdx
					blameEnd[testName][digest] = endIdx
					blameRange[testName][digest] = [][]int{commitRange}
					ret[testName][digest] = &BlameDistribution{Old: isOld}
				} else {
					blameStart[testName][digest] = util.MinInt(currentStart, startIdx)
					blameEnd[testName][digest] = util.MinInt(blameEnd[testName][digest], endIdx)
					blameRange[testName][digest] = append(blameRange[testName][digest], commitRange)
					ret[testName][digest].Old = isOld || ret[testName][digest].Old
				}
			}
			lastIdx = idx
		}
	}

	commits := tile.Commits[:tileLen]
	for testName, digests := range blameRange {
		for digest, commitRanges := range digests {
			start := blameStart[testName][digest]
			end := blameEnd[testName][digest]

			freq := make([]int, len(commits)-start)
			for _, commitRange := range commitRanges {
				// If the commit range is nil, we cannot calculate the a
				// blamelist.
				if commitRange == nil {
					freq = []int{}
					break
				}

				// Calculate the blame.
				idxEnd := util.MinInt(commitRange[1], end)
				for i := commitRange[0]; i <= idxEnd; i++ {
					freq[i-start]++
				}
			}

			ret[testName][digest].Freq = freq
		}
	}

	// Swap out the old blame lists for the new ones.
	b.mutex.Lock()
	b.testBlameLists, b.commits = ret, commits
	b.mutex.Unlock()
	return nil
}
Beispiel #9
0
// buildTraces returns a Trace for the given intermediate.
func buildTraces(test, digest string, inter *intermediate, e *expstorage.Expectations, tile *tiling.Tile, traceTally map[string]tally.Tally) *Traces {
	traceNames := make([]string, 0, len(inter.Traces))
	for id, _ := range inter.Traces {
		traceNames = append(traceNames, id)
	}

	ret := &Traces{
		TileSize: len(tile.Commits),
		Traces:   []Trace{},
		Digests:  []DigestStatus{},
	}

	sort.Strings(traceNames)

	last := tile.LastCommitIndex()
	y := 0
	if len(traceNames) > 0 {
		ret.Digests = append(ret.Digests, DigestStatus{
			Digest: digest,
			Status: e.Classification(test, digest).String(),
		})
	}
	for _, id := range traceNames {
		t, ok := traceTally[id]
		if !ok {
			continue
		}
		if count, ok := t[digest]; !ok || count == 0 {
			continue
		}
		trace := inter.Traces[id].(*types.GoldenTrace)
		p := Trace{
			Data:   []Point{},
			ID:     id,
			Params: trace.Params(),
		}
		for i := last; i >= 0; i-- {
			if trace.IsMissing(i) {
				continue
			}
			// s is the status of the digest, it is either 0 for a match, or [1-8] if not.
			s := 0
			if trace.Values[i] != digest {
				if index := digestIndex(trace.Values[i], ret.Digests); index != -1 {
					s = index
				} else {
					if len(ret.Digests) < 9 {
						d := trace.Values[i]
						ret.Digests = append(ret.Digests, DigestStatus{
							Digest: d,
							Status: e.Classification(test, d).String(),
						})
						s = len(ret.Digests) - 1
					} else {
						s = 8
					}
				}
			}
			p.Data = append(p.Data, Point{
				X: i,
				Y: y,
				S: s,
			})
		}
		sort.Sort(PointSlice(p.Data))
		ret.Traces = append(ret.Traces, p)
		y += 1
	}

	return ret
}
Beispiel #10
0
// buildTraceData returns a populated []*Trace for all the traces that contain 'digest'.
func buildTraceData(digest string, traceNames []string, tile *tiling.Tile, traceTally map[string]tally.Tally, exp *expstorage.Expectations) ([]*Trace, []*DigestStatus) {
	sort.Strings(traceNames)
	ret := []*Trace{}
	last := tile.LastCommitIndex()
	y := 0

	// Keep track of the first 7 non-matching digests we encounter so we can color them differently.
	otherDigests := []*DigestStatus{}
	// Populate otherDigests with all the digests, including the one we are comparing against.
	if len(traceNames) > 0 {
		// Find the test name so we can look up the triage status.
		trace := tile.Traces[traceNames[0]].(*types.GoldenTrace)
		test := trace.Params()[types.PRIMARY_KEY_FIELD]
		otherDigests = append(otherDigests, &DigestStatus{
			Digest: digest,
			Status: exp.Classification(test, digest).String(),
		})
	}
	for _, id := range traceNames {
		t, ok := traceTally[id]
		if !ok {
			continue
		}
		if count, ok := t[digest]; !ok || count == 0 {
			continue
		}
		trace := tile.Traces[id].(*types.GoldenTrace)
		p := &Trace{
			Data:   []Point{},
			Label:  id,
			Params: trace.Params(),
		}
		for i := last; i >= 0; i-- {
			if trace.IsMissing(i) {
				continue
			}
			// s is the status of the digest, it is either 0 for a match, or [1-8] if not.
			s := 0
			if trace.Values[i] != digest {
				if index := digestIndex(trace.Values[i], otherDigests); index != -1 {
					s = index
				} else {
					if len(otherDigests) < 9 {
						d := trace.Values[i]
						test := trace.Params()[types.PRIMARY_KEY_FIELD]
						otherDigests = append(otherDigests, &DigestStatus{
							Digest: d,
							Status: exp.Classification(test, d).String(),
						})
						s = len(otherDigests) - 1
					} else {
						s = 8
					}
				}
			}
			p.Data = append(p.Data, Point{
				X: i,
				Y: y,
				S: s,
			})
		}
		sort.Sort(PointSlice(p.Data))
		ret = append(ret, p)
		y += 1
	}

	return ret, otherDigests
}
Beispiel #11
0
func (s *StatusWatcher) calcStatus(tile *tiling.Tile) error {
	defer timer.New("Calc status timer:").Stop()

	minCommitId := map[string]int{}
	okByCorpus := map[string]bool{}

	expectations, err := s.storages.ExpectationsStore.Get()
	if err != nil {
		return err
	}

	// Gathers unique labels by corpus and label.
	byCorpus := map[string]map[types.Label]map[string]bool{}

	// Iterate over the current traces
	tileLen := tile.LastCommitIndex() + 1
	for _, trace := range tile.Traces {
		gTrace := trace.(*types.GoldenTrace)

		idx := tileLen - 1
		for (idx >= 0) && (gTrace.Values[idx] == types.MISSING_DIGEST) {
			idx--
		}

		// If this is an empty trace we ignore it for now.
		if idx == -1 {
			continue
		}

		// If this corpus doesn't exist yet, we initialize it.
		corpus := gTrace.Params()[types.CORPUS_FIELD]
		if _, ok := byCorpus[corpus]; !ok {
			minCommitId[corpus] = tileLen
			okByCorpus[corpus] = true
			byCorpus[corpus] = map[types.Label]map[string]bool{
				types.POSITIVE:  map[string]bool{},
				types.NEGATIVE:  map[string]bool{},
				types.UNTRIAGED: map[string]bool{},
			}

			if _, ok := corpusGauges[corpus]; !ok {
				corpusGauges[corpus] = map[types.Label]metrics.Gauge{
					types.UNTRIAGED: metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.UNTRIAGED, corpus), nil),
					types.POSITIVE:  metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.POSITIVE, corpus), nil),
					types.NEGATIVE:  metrics.NewRegisteredGauge(fmt.Sprintf(METRIC_CORPUS_TMPL, types.NEGATIVE, corpus), nil),
				}
			}
		}

		// Account for the corpus and testname.
		digest := gTrace.Values[idx]
		testName := gTrace.Params()[types.PRIMARY_KEY_FIELD]
		status := expectations.Classification(testName, digest)

		digestInfo, err := s.storages.GetOrUpdateDigestInfo(testName, digest, tile.Commits[idx])
		if err != nil {
			return err
		}

		okByCorpus[corpus] = okByCorpus[corpus] && ((status == types.POSITIVE) ||
			((status == types.NEGATIVE) && (len(digestInfo.IssueIDs) > 0)))
		minCommitId[corpus] = util.MinInt(idx, minCommitId[corpus])
		byCorpus[corpus][status][digest] = true
	}

	commits := tile.Commits[:tileLen]
	overallOk := true
	allUntriagedCount := 0
	allPositiveCount := 0
	allNegativeCount := 0
	corpStatus := make([]*GUICorpusStatus, 0, len(byCorpus))
	for corpus := range byCorpus {
		overallOk = overallOk && okByCorpus[corpus]
		untriagedCount := len(byCorpus[corpus][types.UNTRIAGED])
		positiveCount := len(byCorpus[corpus][types.POSITIVE])
		negativeCount := len(byCorpus[corpus][types.NEGATIVE])
		corpStatus = append(corpStatus, &GUICorpusStatus{
			Name:           corpus,
			OK:             okByCorpus[corpus],
			MinCommitHash:  commits[minCommitId[corpus]].Hash,
			UntriagedCount: untriagedCount,
			NegativeCount:  negativeCount,
		})
		allUntriagedCount += untriagedCount
		allNegativeCount += negativeCount
		allPositiveCount += positiveCount

		corpusGauges[corpus][types.POSITIVE].Update(int64(positiveCount))
		corpusGauges[corpus][types.NEGATIVE].Update(int64(negativeCount))
		corpusGauges[corpus][types.UNTRIAGED].Update(int64(untriagedCount))
	}
	allUntriagedGauge.Update(int64(allUntriagedCount))
	allPositiveGauge.Update(int64(allPositiveCount))
	allNegativeGauge.Update(int64(allNegativeCount))
	totalGauge.Update(int64(allUntriagedCount + allPositiveCount + allNegativeCount))

	sort.Sort(CorpusStatusSorter(corpStatus))

	// Swap out the current tile.
	result := &GUIStatus{
		OK:         overallOk,
		LastCommit: commits[tileLen-1],
		CorpStatus: corpStatus,
	}
	s.mutex.Lock()
	s.current = result
	s.mutex.Unlock()

	return nil
}