// stepFailureAlerts returns alerts generated from step failures. It applies filtering // logic specified in the gatekeeper config to ignore some failures. func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) { ret := []messages.Alert{} type res struct { f stepFailure a *messages.Alert err error } // Might not need full capacity buffer, since some failures are ignored below. rs := make(chan res, len(failures)) scannedFailures := []stepFailure{} for _, failure := range failures { // goroutine/channel because the reasonsForFailure call potentially // blocks on IO. if failure.step.Name == "steps" { // check results to see if it's an array of [4] // That's a purple failure, which should go to infra/trooper. log.Infof("steps results: %+v", failure.step) if len(failure.step.Results) > 0 { if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure { // TODO: Create a trooper alert about this. log.Errorf("INFRA FAILURE: %+v", failure) } } continue // The actual breaking step will appear later. } // Check the gatekeeper configs to see if this is ignorable. if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) { continue } // Gets the named revision number from gnumbd metadata. getCommitPos := func(b messages.Build, name string) (string, bool) { for _, p := range b.Properties { if p[0] == name { s, ok := p[1].(string) return s, ok } } return "", false } scannedFailures = append(scannedFailures, failure) go func(f stepFailure) { alr := messages.Alert{ Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName), Time: messages.EpochTime(a.Now().Unix()), Type: "buildfailure", } regRanges := []messages.RegressionRange{} revisionsByRepo := map[string][]string{} // Get gnumbd sequence numbers for whatever this build pulled in. chromiumPos, ok := getCommitPos(f.build, "got_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "chromium", Positions: []string{chromiumPos}, }) } blinkPos, ok := getCommitPos(f.build, "got_webkit_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "blink", Positions: []string{blinkPos}, }) } v8Pos, ok := getCommitPos(f.build, "got_v8_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "v8", Positions: []string{v8Pos}, }) } naclPos, ok := getCommitPos(f.build, "got_nacl_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "nacl", Positions: []string{naclPos}, }) } for _, change := range f.build.SourceStamp.Changes { revisionsByRepo[change.Repository] = append(revisionsByRepo[change.Repository], change.Revision) // change.Revision is *not* always a git hash. Sometimes it is a position from gnumbd. // change.Revision is git hash or gnumbd depending on what exactly? Not obvious at this time. // A potential problem here is when multiple repos have overlapping gnumbd ranges. a.revisionSummaries[change.Revision] = messages.RevisionSummary{ GitHash: change.Revision, Link: change.Revlink, Description: trunc(change.Comments), Author: change.Who, When: change.When, } } for repo, revisions := range revisionsByRepo { regRanges = append(regRanges, messages.RegressionRange{ Repo: repo, Revisions: revisions, }) } // If the builder has been failing on the same step for multiple builds in a row, // we should have only one alert but indicate the range of builds affected. // These are set in FirstFailure and LastFailure. bf := messages.BuildFailure{ // FIXME: group builders? Builders: []messages.AlertedBuilder{ { Name: f.builderName, URL: client.BuilderURL(f.masterName, f.builderName), StartTime: f.build.CreatedTimestamp, FirstFailure: f.build.Number, LatestFailure: f.build.Number, }, }, TreeCloser: a.wouldCloseTree(f.masterName, f.builderName, f.step.Name), RegressionRanges: regRanges, } reasons := a.reasonsForFailure(f) for _, r := range reasons { bf.Reasons = append(bf.Reasons, messages.Reason{ TestName: r, Step: f.step.Name, URL: f.URL(), }) } alr.Key = alertKey(f.masterName, f.builderName, f.step.Name) if len(bf.Reasons) == 0 { log.Warningf("No reasons for step failure: %s", alr.Key) bf.Reasons = append(bf.Reasons, messages.Reason{ Step: f.step.Name, URL: f.URL(), }) } alr.Extension = bf rs <- res{ f: f, a: &alr, err: nil, } }(failure) } for range scannedFailures { r := <-rs if r.a != nil { ret = append(ret, *r.a) } } return ret, nil }
// stepFailureAlerts returns alerts generated from step failures. It applies filtering // logic specified in the gatekeeper config to ignore some failures. func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) { ret := []messages.Alert{} type res struct { f stepFailure a *messages.Alert err error } // Might not need full capacity buffer, since some failures are ignored below. rs := make(chan res, len(failures)) scannedFailures := []stepFailure{} for _, failure := range failures { // goroutine/channel because the reasonsForFailure call potentially // blocks on IO. if failure.step.Name == "steps" { // check results to see if it's an array of [4] // That's a purple failure, which should go to infra/trooper. log.Infof("steps results: %+v", failure.step) if len(failure.step.Results) > 0 { if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure { // TODO: Create a trooper alert about this. log.Errorf("INFRA FAILURE: %+v", failure) } } continue // The actual breaking step will appear later. } // Check the gatekeeper configs to see if this is ignorable. if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) { continue } scannedFailures = append(scannedFailures, failure) go func(f stepFailure) { alr := messages.Alert{ Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName), Time: messages.EpochTime(a.now().Unix()), Type: "buildfailure", } regRanges := []messages.RegressionRange{} revsByRepo := map[string][]string{} for _, change := range f.build.SourceStamp.Changes { // check change.Comments for text like // "Cr-Commit-Position: refs/heads/master@{#330158}" to pick out revs from git commits. revsByRepo[change.Repository] = append(revsByRepo[change.Repository], change.Revision) } for repo, revs := range revsByRepo { regRanges = append(regRanges, messages.RegressionRange{ Repo: repo, Revisions: revs, }) } // If the builder has been failing on the same step for multiple builds in a row, // we should have only one alert but indicate the range of builds affected. // These are set in FirstFailure and LastFailure. bf := messages.BuildFailure{ // FIXME: group builders? Builders: []messages.AlertedBuilder{ { Name: f.builderName, URL: client.BuilderURL(f.masterName, f.builderName), StartTime: f.build.CreatedTimestamp, FirstFailure: f.build.Number, LatestFailure: f.build.Number, }, }, TreeCloser: a.wouldCloseTree(f.masterName, f.builderName, f.step.Name), RegressionRanges: regRanges, } reasons := a.reasonsForFailure(f) for _, r := range reasons { bf.Reasons = append(bf.Reasons, messages.Reason{ TestName: r, Step: f.step.Name, URL: f.URL(), }) } alr.Key = alertKey(f.masterName, f.builderName, f.step.Name) if len(bf.Reasons) == 0 { log.Warningf("No reasons for step failure: %s", alr.Key) bf.Reasons = append(bf.Reasons, messages.Reason{ Step: f.step.Name, URL: f.URL(), }) } alr.Extension = bf rs <- res{ f: f, a: &alr, err: nil, } }(failure) } for range scannedFailures { r := <-rs if r.a != nil { ret = append(ret, *r.a) } } return ret, nil }