Beispiel #1
0
// MasterAlerts returns alerts generated from the master.
func (a *Analyzer) MasterAlerts(master string, be *messages.BuildExtract) []messages.Alert {
	ret := []messages.Alert{}

	// Copied logic from builder_messages.
	// No created_timestamp should be a warning sign, no?
	if be.CreatedTimestamp == messages.EpochTime(0) {
		return ret
	}

	elapsed := a.Now().Sub(be.CreatedTimestamp.Time())
	if elapsed > a.StaleMasterThreshold {
		ret = append(ret, messages.Alert{
			Key:       fmt.Sprintf("stale master: %v", master),
			Title:     fmt.Sprintf("Stale %s master data", master),
			Body:      fmt.Sprintf("%s elapsed since last update.", elapsed),
			StartTime: messages.TimeToEpochTime(be.CreatedTimestamp.Time()),
			Severity:  staleMasterSev,
			Time:      messages.TimeToEpochTime(a.Now()),
			Links:     []messages.Link{{"Master", client.MasterURL(master)}},
			// No type or extension for now.
		})
	}
	if elapsed < 0 {
		// Add this to the alerts returned, rather than just log it?
		log.Errorf("Master %s timestamp is newer than current time (%s): %s old.", master, a.Now(), elapsed)
	}

	return ret
}
Beispiel #2
0
func TestBuilderAlerts(t *testing.T) {
	tests := []struct {
		name         string
		url          string
		be           messages.BuildExtract
		filter       string
		t            time.Time
		wantBuilders []messages.Alert
		wantMasters  []messages.Alert
	}{
		{
			name:         "Empty",
			url:          "https://build.chromium.org/p/fake.master/json",
			wantBuilders: []messages.Alert{},
			wantMasters:  []messages.Alert{},
		},
		{
			name: "No Alerts",
			url:  "https://build.chromium.org/p/fake.master/json",
			be: messages.BuildExtract{
				CreatedTimestamp: messages.EpochTime(100),
			},
			t:            time.Unix(100, 0),
			wantBuilders: []messages.Alert{},
			wantMasters:  []messages.Alert{},
		},
	}

	a := New(&mockReader{}, 0, 10)

	for _, test := range tests {
		a.now = fakeNow(test.t)
		got := a.BuilderAlerts(test.url, &test.be)
		if !reflect.DeepEqual(got, test.wantBuilders) {
			t.Errorf("%s failed. Got %+v, want: %+v", test.name, got, test.wantBuilders)
		}
	}
}
Beispiel #3
0
func TestMasterAlerts(t *testing.T) {
	tests := []struct {
		name   string
		master string
		be     messages.BuildExtract
		t      time.Time
		want   []messages.Alert
	}{
		{
			name:   "empty",
			master: "fake-empty",
			want:   []messages.Alert{},
		},
		{
			name:   "Not stale master",
			master: "fake-not-stale",
			be: messages.BuildExtract{
				CreatedTimestamp: messages.EpochTime(100),
			},
			t:    time.Unix(100, 0),
			want: []messages.Alert{},
		},
		{
			name:   "Stale master",
			master: "fake.master",
			be: messages.BuildExtract{
				CreatedTimestamp: messages.EpochTime(100),
			},
			t: time.Unix(100, 0).Add(20 * time.Minute),
			want: []messages.Alert{
				{
					Key:       "stale master: fake.master",
					Title:     "Stale fake.master master data",
					Body:      fmt.Sprintf("%s elapsed since last update.", 20*time.Minute),
					Time:      messages.TimeToEpochTime(time.Unix(100, 0).Add(20 * time.Minute)),
					Links:     []messages.Link{{"Master", client.MasterURL("fake.master")}},
					StartTime: messages.EpochTime(100),
				},
			},
		},
		{
			name:   "Future master",
			master: "fake.master",
			be: messages.BuildExtract{
				CreatedTimestamp: messages.EpochTime(110),
			},
			t:    time.Unix(100, 0),
			want: []messages.Alert{},
		},
	}

	a := New(&mockReader{}, 0, 10)

	for _, test := range tests {
		a.now = fakeNow(test.t)
		got := a.MasterAlerts(test.master, &test.be)
		if !reflect.DeepEqual(got, test.want) {
			t.Errorf("%s failed. Got %+v, want: %+v", test.name, got, test.want)
		}
	}
}
Beispiel #4
0
// stepFailureAlerts returns alerts generated from step failures. It applies filtering
// logic specified in the gatekeeper config to ignore some failures.
func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) {
	ret := []messages.Alert{}
	type res struct {
		f   stepFailure
		a   *messages.Alert
		err error
	}

	// Might not need full capacity buffer, since some failures are ignored below.
	rs := make(chan res, len(failures))

	scannedFailures := []stepFailure{}
	for _, failure := range failures {
		// goroutine/channel because the reasonsForFailure call potentially
		// blocks on IO.
		if failure.step.Name == "steps" {
			// check results to see if it's an array of [4]
			// That's a purple failure, which should go to infra/trooper.
			log.Infof("steps results: %+v", failure.step)
			if len(failure.step.Results) > 0 {
				if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure {
					// TODO: Create a trooper alert about this.
					log.Errorf("INFRA FAILURE: %+v", failure)
				}
			}
			continue
			// The actual breaking step will appear later.
		}

		// Check the gatekeeper configs to see if this is ignorable.
		if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) {
			continue
		}

		// Gets the named revision number from gnumbd metadata.
		getCommitPos := func(b messages.Build, name string) (string, bool) {
			for _, p := range b.Properties {
				if p[0] == name {
					s, ok := p[1].(string)
					return s, ok
				}
			}
			return "", false
		}

		scannedFailures = append(scannedFailures, failure)
		go func(f stepFailure) {
			alr := messages.Alert{
				Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName),
				Time:  messages.EpochTime(a.Now().Unix()),
				Type:  "buildfailure",
			}

			regRanges := []messages.RegressionRange{}
			revisionsByRepo := map[string][]string{}

			// Get gnumbd sequence numbers for whatever this build pulled in.
			chromiumPos, ok := getCommitPos(f.build, "got_revision_cp")
			if ok {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      "chromium",
					Positions: []string{chromiumPos},
				})
			}

			blinkPos, ok := getCommitPos(f.build, "got_webkit_revision_cp")
			if ok {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      "blink",
					Positions: []string{blinkPos},
				})
			}

			v8Pos, ok := getCommitPos(f.build, "got_v8_revision_cp")
			if ok {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      "v8",
					Positions: []string{v8Pos},
				})
			}

			naclPos, ok := getCommitPos(f.build, "got_nacl_revision_cp")
			if ok {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      "nacl",
					Positions: []string{naclPos},
				})
			}

			for _, change := range f.build.SourceStamp.Changes {
				revisionsByRepo[change.Repository] = append(revisionsByRepo[change.Repository], change.Revision)
				// change.Revision is *not* always a git hash. Sometimes it is a position from gnumbd.
				// change.Revision is git hash or gnumbd depending on what exactly? Not obvious at this time.
				// A potential problem here is when multiple repos have overlapping gnumbd ranges.
				a.revisionSummaries[change.Revision] = messages.RevisionSummary{
					GitHash:     change.Revision,
					Link:        change.Revlink,
					Description: trunc(change.Comments),
					Author:      change.Who,
					When:        change.When,
				}
			}

			for repo, revisions := range revisionsByRepo {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      repo,
					Revisions: revisions,
				})
			}

			// If the builder has been failing on the same step for multiple builds in a row,
			// we should have only one alert but indicate the range of builds affected.
			// These are set in FirstFailure and LastFailure.
			bf := messages.BuildFailure{
				// FIXME: group builders?
				Builders: []messages.AlertedBuilder{
					{
						Name:          f.builderName,
						URL:           client.BuilderURL(f.masterName, f.builderName),
						StartTime:     f.build.CreatedTimestamp,
						FirstFailure:  f.build.Number,
						LatestFailure: f.build.Number,
					},
				},
				TreeCloser:       a.wouldCloseTree(f.masterName, f.builderName, f.step.Name),
				RegressionRanges: regRanges,
			}

			reasons := a.reasonsForFailure(f)
			for _, r := range reasons {
				bf.Reasons = append(bf.Reasons, messages.Reason{
					TestName: r,
					Step:     f.step.Name,
					URL:      f.URL(),
				})
			}

			alr.Key = alertKey(f.masterName, f.builderName, f.step.Name)
			if len(bf.Reasons) == 0 {
				log.Warningf("No reasons for step failure: %s", alr.Key)
				bf.Reasons = append(bf.Reasons, messages.Reason{
					Step: f.step.Name,
					URL:  f.URL(),
				})
			}

			alr.Extension = bf

			rs <- res{
				f:   f,
				a:   &alr,
				err: nil,
			}
		}(failure)
	}

	for range scannedFailures {
		r := <-rs
		if r.a != nil {
			ret = append(ret, *r.a)
		}
	}

	return ret, nil
}
Beispiel #5
0
// stepFailureAlerts returns alerts generated from step failures. It applies filtering
// logic specified in the gatekeeper config to ignore some failures.
func (a *Analyzer) stepFailureAlerts(failures []stepFailure) ([]messages.Alert, error) {
	ret := []messages.Alert{}
	type res struct {
		f   stepFailure
		a   *messages.Alert
		err error
	}

	// Might not need full capacity buffer, since some failures are ignored below.
	rs := make(chan res, len(failures))

	scannedFailures := []stepFailure{}
	for _, failure := range failures {
		// goroutine/channel because the reasonsForFailure call potentially
		// blocks on IO.
		if failure.step.Name == "steps" {
			// check results to see if it's an array of [4]
			// That's a purple failure, which should go to infra/trooper.
			log.Infof("steps results: %+v", failure.step)
			if len(failure.step.Results) > 0 {
				if r, ok := failure.step.Results[0].(float64); ok && r == resInfraFailure {
					// TODO: Create a trooper alert about this.
					log.Errorf("INFRA FAILURE: %+v", failure)
				}
			}
			continue
			// The actual breaking step will appear later.
		}

		// Check the gatekeeper configs to see if this is ignorable.
		if a.excludeFailure(failure.masterName, failure.builderName, failure.step.Name) {
			continue
		}

		scannedFailures = append(scannedFailures, failure)
		go func(f stepFailure) {
			alr := messages.Alert{
				Title: fmt.Sprintf("Builder step failure: %s.%s", f.masterName, f.builderName),
				Time:  messages.EpochTime(a.now().Unix()),
				Type:  "buildfailure",
			}

			regRanges := []messages.RegressionRange{}
			revsByRepo := map[string][]string{}

			for _, change := range f.build.SourceStamp.Changes {
				// check change.Comments for text like
				// "Cr-Commit-Position: refs/heads/master@{#330158}" to pick out revs from git commits.
				revsByRepo[change.Repository] = append(revsByRepo[change.Repository], change.Revision)
			}
			for repo, revs := range revsByRepo {
				regRanges = append(regRanges, messages.RegressionRange{
					Repo:      repo,
					Revisions: revs,
				})
			}

			// If the builder has been failing on the same step for multiple builds in a row,
			// we should have only one alert but indicate the range of builds affected.
			// These are set in FirstFailure and LastFailure.
			bf := messages.BuildFailure{
				// FIXME: group builders?
				Builders: []messages.AlertedBuilder{
					{
						Name:          f.builderName,
						URL:           client.BuilderURL(f.masterName, f.builderName),
						StartTime:     f.build.CreatedTimestamp,
						FirstFailure:  f.build.Number,
						LatestFailure: f.build.Number,
					},
				},
				TreeCloser:       a.wouldCloseTree(f.masterName, f.builderName, f.step.Name),
				RegressionRanges: regRanges,
			}

			reasons := a.reasonsForFailure(f)
			for _, r := range reasons {
				bf.Reasons = append(bf.Reasons, messages.Reason{
					TestName: r,
					Step:     f.step.Name,
					URL:      f.URL(),
				})
			}

			alr.Key = alertKey(f.masterName, f.builderName, f.step.Name)
			if len(bf.Reasons) == 0 {
				log.Warningf("No reasons for step failure: %s", alr.Key)
				bf.Reasons = append(bf.Reasons, messages.Reason{
					Step: f.step.Name,
					URL:  f.URL(),
				})
			}

			alr.Extension = bf

			rs <- res{
				f:   f,
				a:   &alr,
				err: nil,
			}
		}(failure)
	}

	for range scannedFailures {
		r := <-rs
		if r.a != nil {
			ret = append(ret, *r.a)
		}
	}

	return ret, nil
}