// MasterAlerts returns alerts generated from the master. func (a *Analyzer) MasterAlerts(master string, be *messages.BuildExtract) []messages.Alert { ret := []messages.Alert{} // Copied logic from builder_messages. // No created_timestamp should be a warning sign, no? if be.CreatedTimestamp == messages.EpochTime(0) { return ret } elapsed := a.Now().Sub(be.CreatedTimestamp.Time()) if elapsed > a.StaleMasterThreshold { ret = append(ret, messages.Alert{ Key: fmt.Sprintf("stale master: %v", master), Title: fmt.Sprintf("Stale %s master data", master), Body: fmt.Sprintf("%s elapsed since last update.", elapsed), StartTime: messages.TimeToEpochTime(be.CreatedTimestamp.Time()), Severity: staleMasterSev, Time: messages.TimeToEpochTime(a.Now()), Links: []messages.Link{{"Master", client.MasterURL(master)}}, // No type or extension for now. }) } if elapsed < 0 { // Add this to the alerts returned, rather than just log it? log.Errorf("Master %s timestamp is newer than current time (%s): %s old.", master, a.Now(), elapsed) } return ret }
func TestBuilderAlerts(t *testing.T) { tests := []struct { name string url string be messages.BuildExtract filter string t time.Time wantBuilders []messages.Alert wantMasters []messages.Alert }{ { name: "Empty", url: "https://build.chromium.org/p/fake.master/json", wantBuilders: []messages.Alert{}, wantMasters: []messages.Alert{}, }, { name: "No Alerts", url: "https://build.chromium.org/p/fake.master/json", be: messages.BuildExtract{ CreatedTimestamp: messages.EpochTime(100), }, t: time.Unix(100, 0), wantBuilders: []messages.Alert{}, wantMasters: []messages.Alert{}, }, } a := New(&mockReader{}, 0, 10) for _, test := range tests { a.now = fakeNow(test.t) got := a.BuilderAlerts(test.url, &test.be) if !reflect.DeepEqual(got, test.wantBuilders) { t.Errorf("%s failed. Got %+v, want: %+v", test.name, got, test.wantBuilders) } } }
func TestMasterAlerts(t *testing.T) { tests := []struct { name string master string be messages.BuildExtract t time.Time want []messages.Alert }{ { name: "empty", master: "fake-empty", want: []messages.Alert{}, }, { name: "Not stale master", master: "fake-not-stale", be: messages.BuildExtract{ CreatedTimestamp: messages.EpochTime(100), }, t: time.Unix(100, 0), want: []messages.Alert{}, }, { name: "Stale master", master: "fake.master", be: messages.BuildExtract{ CreatedTimestamp: messages.EpochTime(100), }, t: time.Unix(100, 0).Add(20 * time.Minute), want: []messages.Alert{ { Key: "stale master: fake.master", Title: "Stale fake.master master data", Body: fmt.Sprintf("%s elapsed since last update.", 20*time.Minute), Time: messages.TimeToEpochTime(time.Unix(100, 0).Add(20 * time.Minute)), Links: []messages.Link{{"Master", client.MasterURL("fake.master")}}, StartTime: messages.EpochTime(100), }, }, }, { name: "Future master", master: "fake.master", be: messages.BuildExtract{ CreatedTimestamp: messages.EpochTime(110), }, t: time.Unix(100, 0), want: []messages.Alert{}, }, } a := New(&mockReader{}, 0, 10) for _, test := range tests { a.now = fakeNow(test.t) got := a.MasterAlerts(test.master, &test.be) if !reflect.DeepEqual(got, test.want) { t.Errorf("%s failed. Got %+v, want: %+v", test.name, got, test.want) } } }
// stepFailureAlerts returns alerts generated from step failures. It applies filtering // logic specified in the gatekeeper config to ignore some failures. func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) { ret := []messages.Alert{} type res struct { f stepFailure a *messages.Alert err error } // Might not need full capacity buffer, since some failures are ignored below. rs := make(chan res, len(failures)) scannedFailures := []stepFailure{} for _, failure := range failures { // goroutine/channel because the reasonsForFailure call potentially // blocks on IO. if failure.step.Name == "steps" { // check results to see if it's an array of [4] // That's a purple failure, which should go to infra/trooper. log.Infof("steps results: %+v", failure.step) if len(failure.step.Results) > 0 { if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure { // TODO: Create a trooper alert about this. log.Errorf("INFRA FAILURE: %+v", failure) } } continue // The actual breaking step will appear later. } // Check the gatekeeper configs to see if this is ignorable. if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) { continue } // Gets the named revision number from gnumbd metadata. getCommitPos := func(b messages.Build, name string) (string, bool) { for _, p := range b.Properties { if p[0] == name { s, ok := p[1].(string) return s, ok } } return "", false } scannedFailures = append(scannedFailures, failure) go func(f stepFailure) { alr := messages.Alert{ Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName), Time: messages.EpochTime(a.Now().Unix()), Type: "buildfailure", } regRanges := []messages.RegressionRange{} revisionsByRepo := map[string][]string{} // Get gnumbd sequence numbers for whatever this build pulled in. chromiumPos, ok := getCommitPos(f.build, "got_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "chromium", Positions: []string{chromiumPos}, }) } blinkPos, ok := getCommitPos(f.build, "got_webkit_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "blink", Positions: []string{blinkPos}, }) } v8Pos, ok := getCommitPos(f.build, "got_v8_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "v8", Positions: []string{v8Pos}, }) } naclPos, ok := getCommitPos(f.build, "got_nacl_revision_cp") if ok { regRanges = append(regRanges, messages.RegressionRange{ Repo: "nacl", Positions: []string{naclPos}, }) } for _, change := range f.build.SourceStamp.Changes { revisionsByRepo[change.Repository] = append(revisionsByRepo[change.Repository], change.Revision) // change.Revision is *not* always a git hash. Sometimes it is a position from gnumbd. // change.Revision is git hash or gnumbd depending on what exactly? Not obvious at this time. // A potential problem here is when multiple repos have overlapping gnumbd ranges. a.revisionSummaries[change.Revision] = messages.RevisionSummary{ GitHash: change.Revision, Link: change.Revlink, Description: trunc(change.Comments), Author: change.Who, When: change.When, } } for repo, revisions := range revisionsByRepo { regRanges = append(regRanges, messages.RegressionRange{ Repo: repo, Revisions: revisions, }) } // If the builder has been failing on the same step for multiple builds in a row, // we should have only one alert but indicate the range of builds affected. // These are set in FirstFailure and LastFailure. bf := messages.BuildFailure{ // FIXME: group builders? Builders: []messages.AlertedBuilder{ { Name: f.builderName, URL: client.BuilderURL(f.masterName, f.builderName), StartTime: f.build.CreatedTimestamp, FirstFailure: f.build.Number, LatestFailure: f.build.Number, }, }, TreeCloser: a.wouldCloseTree(f.masterName, f.builderName, f.step.Name), RegressionRanges: regRanges, } reasons := a.reasonsForFailure(f) for _, r := range reasons { bf.Reasons = append(bf.Reasons, messages.Reason{ TestName: r, Step: f.step.Name, URL: f.URL(), }) } alr.Key = alertKey(f.masterName, f.builderName, f.step.Name) if len(bf.Reasons) == 0 { log.Warningf("No reasons for step failure: %s", alr.Key) bf.Reasons = append(bf.Reasons, messages.Reason{ Step: f.step.Name, URL: f.URL(), }) } alr.Extension = bf rs <- res{ f: f, a: &alr, err: nil, } }(failure) } for range scannedFailures { r := <-rs if r.a != nil { ret = append(ret, *r.a) } } return ret, nil }
// stepFailureAlerts returns alerts generated from step failures. It applies filtering // logic specified in the gatekeeper config to ignore some failures. func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) { ret := []messages.Alert{} type res struct { f stepFailure a *messages.Alert err error } // Might not need full capacity buffer, since some failures are ignored below. rs := make(chan res, len(failures)) scannedFailures := []stepFailure{} for _, failure := range failures { // goroutine/channel because the reasonsForFailure call potentially // blocks on IO. if failure.step.Name == "steps" { // check results to see if it's an array of [4] // That's a purple failure, which should go to infra/trooper. log.Infof("steps results: %+v", failure.step) if len(failure.step.Results) > 0 { if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure { // TODO: Create a trooper alert about this. log.Errorf("INFRA FAILURE: %+v", failure) } } continue // The actual breaking step will appear later. } // Check the gatekeeper configs to see if this is ignorable. if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) { continue } scannedFailures = append(scannedFailures, failure) go func(f stepFailure) { alr := messages.Alert{ Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName), Time: messages.EpochTime(a.now().Unix()), Type: "buildfailure", } regRanges := []messages.RegressionRange{} revsByRepo := map[string][]string{} for _, change := range f.build.SourceStamp.Changes { // check change.Comments for text like // "Cr-Commit-Position: refs/heads/master@{#330158}" to pick out revs from git commits. revsByRepo[change.Repository] = append(revsByRepo[change.Repository], change.Revision) } for repo, revs := range revsByRepo { regRanges = append(regRanges, messages.RegressionRange{ Repo: repo, Revisions: revs, }) } // If the builder has been failing on the same step for multiple builds in a row, // we should have only one alert but indicate the range of builds affected. // These are set in FirstFailure and LastFailure. bf := messages.BuildFailure{ // FIXME: group builders? Builders: []messages.AlertedBuilder{ { Name: f.builderName, URL: client.BuilderURL(f.masterName, f.builderName), StartTime: f.build.CreatedTimestamp, FirstFailure: f.build.Number, LatestFailure: f.build.Number, }, }, TreeCloser: a.wouldCloseTree(f.masterName, f.builderName, f.step.Name), RegressionRanges: regRanges, } reasons := a.reasonsForFailure(f) for _, r := range reasons { bf.Reasons = append(bf.Reasons, messages.Reason{ TestName: r, Step: f.step.Name, URL: f.URL(), }) } alr.Key = alertKey(f.masterName, f.builderName, f.step.Name) if len(bf.Reasons) == 0 { log.Warningf("No reasons for step failure: %s", alr.Key) bf.Reasons = append(bf.Reasons, messages.Reason{ Step: f.step.Name, URL: f.URL(), }) } alr.Extension = bf rs <- res{ f: f, a: &alr, err: nil, } }(failure) } for range scannedFailures { r := <-rs if r.a != nil { ret = append(ret, *r.a) } } return ret, nil }