Exemplo n.º 1
0
// URL returns a url to builder step failure page.
func (f stepFailure) URL() string {
	return client.StepURL(f.masterName, f.builderName, f.step.Name, f.build.Number)
}
Exemplo n.º 2
0
// TODO: also check the build slaves to see if there are alerts for currently running builds that
// haven't shown up in CBE yet.
func (a *Analyzer) builderAlerts(masterName string, builderName string, b *messages.Builder) ([]messages.Alert, []error) {
	if len(b.CachedBuilds) == 0 {
		// TODO: Make an alert for this?
		return nil, []error{errNoRecentBuilds}
	}

	recentBuildIDs := b.CachedBuilds
	// Should be a *reverse* sort.
	sort.Sort(buildNums(recentBuildIDs))
	if len(recentBuildIDs) > a.MaxRecentBuilds {
		recentBuildIDs = recentBuildIDs[:a.MaxRecentBuilds]
	}

	alerts, errs := []messages.Alert{}, []error{}

	lastBuild, lastCompletedBuild, err := a.lastBuilds(masterName, builderName, recentBuildIDs)
	if err != nil {
		errs = append(errs, err)
		return nil, errs
	}

	// Examining only the latest build is probably suboptimal since if it's still in progress it might
	// not have hit a step that is going to fail and has failed repeatedly for the last few builds.
	// AKA "Reliable failures".  TODO: Identify "Reliable failures"
	lastStep, lastUpdated, err := a.latestBuildStep(lastBuild)
	if err != nil {
		errs = append(errs, fmt.Errorf("Couldn't get latest build step for %s.%s: %v", masterName, builderName, err))
		return alerts, errs
	}
	elapsed := a.Now().Sub(lastUpdated.Time())
	links := []messages.Link{
		{"Builder", client.BuilderURL(masterName, builderName)},
		{"Last build", client.BuildURL(masterName, builderName, lastBuild.Number)},
		{"Last build step", client.StepURL(masterName, builderName, lastStep, lastBuild.Number)},
	}

	switch b.State {
	case messages.StateBuilding:
		if elapsed > a.HungBuilderThresh && lastStep != StepCompletedRun {
			alerts = append(alerts, messages.Alert{
				Key:      fmt.Sprintf("%s.%s.hung", masterName, builderName),
				Title:    fmt.Sprintf("%s.%s is hung in step %s.", masterName, builderName, lastStep),
				Body:     fmt.Sprintf("%s.%s has been building for %v (last step update %s), past the alerting threshold of %v", masterName, builderName, elapsed, lastUpdated.Time(), a.HungBuilderThresh),
				Severity: hungBuilderSev,
				Time:     messages.TimeToEpochTime(a.Now()),
				Links:    links,
			})
			// Note, just because it's building doesn't mean it's in a good state. If the last N builds
			// all failed (for some large N) then this might still be alertable.
		}
	case messages.StateOffline:
		if elapsed > a.OfflineBuilderThresh {
			alerts = append(alerts, messages.Alert{
				Key:      fmt.Sprintf("%s.%s.offline", masterName, builderName),
				Title:    fmt.Sprintf("%s.%s is offline.", masterName, builderName),
				Body:     fmt.Sprintf("%s.%s has been offline for %v (last step update %s %v), past the alerting threshold of %v", masterName, builderName, elapsed, lastUpdated.Time(), float64(lastUpdated), a.OfflineBuilderThresh),
				Severity: offlineBuilderSev,
				Time:     messages.TimeToEpochTime(a.Now()),
				Links:    links,
			})
		}
	case messages.StateIdle:
		if b.PendingBuilds > a.IdleBuilderCountThresh {
			alerts = append(alerts, messages.Alert{
				Key:      fmt.Sprintf("%s.%s.idle", masterName, builderName),
				Title:    fmt.Sprintf("%s.%s is idle with too many pending builds.", masterName, builderName),
				Body:     fmt.Sprintf("%s.%s is idle with %d pending builds, past the alerting threshold of %d", masterName, builderName, b.PendingBuilds, a.IdleBuilderCountThresh),
				Severity: idleBuilderSev,
				Time:     messages.TimeToEpochTime(a.Now()),
				Links:    links,
			})
		}
	default:
		log.Errorf("Unknown %s.%s builder state: %s", masterName, builderName, b.State)
	}

	// Check for alerts on the most recent complete build
	log.Infof("Checking %d most recent builds for alertable step failures: %s/%s", len(recentBuildIDs), masterName, builderName)
	as, es := a.builderStepAlerts(masterName, builderName, []int64{lastCompletedBuild.Number})

	if len(as) > 0 {
		mostRecentComplete := 0
		for i, id := range recentBuildIDs {
			if id == lastCompletedBuild.Number {
				mostRecentComplete = i
			}
		}
		as, es = a.builderStepAlerts(masterName, builderName, recentBuildIDs[mostRecentComplete:])
		alerts = append(alerts, as...)
		errs = append(errs, es...)
	}

	return alerts, errs
}