Exemple #1
0
func buildsJsonHandler(w http.ResponseWriter, r *http.Request) {
	defer timer.New("buildsHandler").Stop()
	w.Header().Set("Content-Type", "application/json")

	start, err := getIntParam("start", r)
	if err != nil {
		util.ReportError(w, r, err, fmt.Sprintf("Invalid value for parameter \"start\": %v", err))
		return
	}
	end, err := getIntParam("end", r)
	if err != nil {
		util.ReportError(w, r, err, fmt.Sprintf("Invalid value for parameter \"end\": %v", err))
		return
	}

	var startTime time.Time
	var endTime time.Time

	if end == nil {
		endTime = time.Now()
	} else {
		endTime = time.Unix(int64(*end), 0)
	}

	if start == nil {
		startTime = endTime.AddDate(0, 0, -1)
	} else {
		startTime = time.Unix(int64(*start), 0)
	}

	// Fetch the builds.
	builds, err := buildbot.GetBuildsFromDateRange(startTime, endTime)
	if err != nil {
		util.ReportError(w, r, err, fmt.Sprintf("Failed to load builds: %v", err))
		return
	}
	// Shrink the builds.
	// TODO(borenet): Can we share build-shrinking code with the main status
	// page?

	// TinyBuildStep is a struct containing a small subset of a BuildStep's fields.
	type TinyBuildStep struct {
		Name     string
		Started  float64
		Finished float64
		Results  int
	}

	// TinyBuild is a struct containing a small subset of a Build's fields.
	type TinyBuild struct {
		Builder    string
		BuildSlave string
		Master     string
		Number     int
		Properties [][]interface{} `json:"properties"`
		Started    float64
		Finished   float64
		Results    int
		Steps      []*TinyBuildStep
	}

	rv := make([]*TinyBuild, 0, len(builds))
	for _, b := range builds {
		steps := make([]*TinyBuildStep, 0, len(b.Steps))
		for _, s := range b.Steps {
			steps = append(steps, &TinyBuildStep{
				Name:     s.Name,
				Started:  s.Started,
				Finished: s.Finished,
				Results:  s.Results,
			})
		}
		rv = append(rv, &TinyBuild{
			Builder:    b.Builder,
			BuildSlave: b.BuildSlave,
			Master:     b.Master,
			Number:     b.Number,
			Properties: b.Properties,
			Started:    b.Started,
			Finished:   b.Finished,
			Results:    b.Results,
			Steps:      steps,
		})
	}

	defer timer.New("buildsHandler_encode").Stop()
	if err := json.NewEncoder(w).Encode(rv); err != nil {
		glog.Errorf("Failed to write or encode output: %s", err)
		return
	}
}
Exemple #2
0
func StartAlertRoutines(am *alerting.AlertManager, tickInterval time.Duration, c *influxdb.Client) {
	emailAction, err := alerting.ParseAction("Email([email protected])")
	if err != nil {
		glog.Fatal(err)
	}
	actions := []alerting.Action{emailAction}

	// Disconnected buildslaves.
	go func() {
		seriesTmpl := "buildbot.buildslaves.%s.connected"
		re := regexp.MustCompile("[^A-Za-z0-9]+")
		for _ = range time.Tick(tickInterval) {
			glog.Info("Loading buildslave data.")
			slaves, err := buildbot.GetBuildSlaves()
			if err != nil {
				glog.Error(err)
				continue
			}
			for masterName, m := range slaves {
				for _, s := range m {
					if util.In(s.Name, BUILDSLAVE_OFFLINE_BLACKLIST) {
						continue
					}
					v := int64(0)
					if s.Connected {
						v = int64(1)
					}
					metric := fmt.Sprintf(seriesTmpl, re.ReplaceAllString(s.Name, "_"))
					metrics.GetOrRegisterGauge(metric, metrics.DefaultRegistry).Update(v)
					if !s.Connected {
						// This buildslave is offline. Figure out which one it is.
						if err := am.AddAlert(&alerting.Alert{
							Name:        fmt.Sprintf("Buildslave %s offline", s.Name),
							Category:    alerting.INFRA_ALERT,
							Message:     fmt.Sprintf(BUILDSLAVE_OFFLINE, s.Name, masterName, s.Name, s.Name, s.Name),
							Nag:         int64(time.Hour),
							AutoDismiss: int64(2 * tickInterval),
							Actions:     actions,
						}); err != nil {
							glog.Error(err)
						}
					}
				}
			}
		}
	}()

	// AutoRoll failure.
	go func() {
		getDepsRollStatus := func() (*autoroller.AutoRollStatus, error) {
			resp, err := http.Get(autoroll.AUTOROLL_STATUS_URL)
			if err != nil {
				return nil, err
			}

			defer util.Close(resp.Body)
			var status autoroller.AutoRollStatus
			if err := json.NewDecoder(resp.Body).Decode(&status); err != nil {
				return nil, err
			}
			return &status, nil
		}

		for _ = range time.Tick(time.Minute) {
			glog.Infof("Searching for DEPS rolls.")
			status, err := getDepsRollStatus()
			if err != nil {
				util.LogErr(fmt.Errorf("Failed to search for DEPS rolls: %v", err))
				continue
			}

			activeAlert := am.ActiveAlert(AUTOROLL_ALERT_NAME)
			if status.LastRoll != nil {
				if status.LastRoll.Closed {
					if status.LastRoll.Succeeded() {
						if activeAlert != 0 {
							msg := fmt.Sprintf("Subsequent roll succeeded: %s/%d", autoroll.RIETVELD_URL, status.LastRoll.Issue)
							if err := am.Dismiss(activeAlert, alerting.USER_ALERTSERVER, msg); err != nil {
								util.LogErr(err)
							}
						}
					} else if status.LastRoll.Failed() {
						if err := am.AddAlert(&alerting.Alert{
							Name:    AUTOROLL_ALERT_NAME,
							Message: fmt.Sprintf("DEPS roll failed: %s/%d", autoroll.RIETVELD_URL, status.LastRoll.Issue),
							Nag:     int64(3 * time.Hour),
							Actions: actions,
						}); err != nil {
							util.LogErr(err)
						}
					}
				}
			}
		}
	}()

	// Android device disconnects, hung buildslaves.
	go func() {
		// These builders are frequently slow. Ignore them when looking for hung buildslaves.
		hungSlavesIgnore := []string{
			"Housekeeper-Nightly-RecreateSKPs_Canary",
			"Housekeeper-Weekly-RecreateSKPs",
			"Linux Builder",
			"Mac Builder",
			"Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-Valgrind",
			"Test-Ubuntu-GCC-ShuttleA-GPU-GTX550Ti-x86_64-Release-Valgrind",
			"Win Builder",
		}
		hangTimePeriod := 3 * time.Hour
		for _ = range time.Tick(tickInterval) {
			glog.Infof("Searching for hung buildslaves and disconnected Android devices.")
			builds, err := buildbot.GetUnfinishedBuilds()
			if err != nil {
				glog.Error(err)
				continue
			}
			for _, b := range builds {
				// Disconnected Android device?
				disconnectedAndroid := false
				if strings.Contains(b.Builder, "Android") && !strings.Contains(b.Builder, "Build") {
					for _, s := range b.Steps {
						if strings.Contains(s.Name, "wait for device") {
							// If "wait for device" has been running for 10 minutes, the device is probably offline.
							if s.Finished == 0 && time.Since(time.Unix(int64(s.Started), 0)) > 10*time.Minute {
								if err := am.AddAlert(&alerting.Alert{
									Name:     fmt.Sprintf("Android device disconnected (%s)", b.BuildSlave),
									Category: alerting.INFRA_ALERT,
									Message:  fmt.Sprintf(ANDROID_DISCONNECT, b.BuildSlave, b.Master, b.Builder, b.Number, b.BuildSlave, b.BuildSlave),
									Nag:      int64(3 * time.Hour),
									Actions:  actions,
								}); err != nil {
									glog.Error(err)
								}
								disconnectedAndroid = true
							}
						}
					}
				}
				if !disconnectedAndroid && !util.ContainsAny(b.Builder, hungSlavesIgnore) {
					// Hung buildslave?
					for _, s := range b.Steps {
						if s.Name == "steps" {
							continue
						}
						// If the step has been running for over an hour, it's probably hung.
						if s.Finished == 0 && time.Since(time.Unix(int64(s.Started), 0)) > hangTimePeriod {
							if err := am.AddAlert(&alerting.Alert{
								Name:        fmt.Sprintf("Possibly hung buildslave (%s)", b.BuildSlave),
								Category:    alerting.INFRA_ALERT,
								Message:     fmt.Sprintf(HUNG_BUILDSLAVE, b.BuildSlave, hangTimePeriod.String(), b.Master, b.Builder, b.Number, b.BuildSlave, b.BuildSlave),
								Nag:         int64(time.Hour),
								Actions:     actions,
								AutoDismiss: int64(10 * tickInterval),
							}); err != nil {
								glog.Error(err)
							}
						}
					}
				}
			}
		}
	}()

	// Failed update_scripts.
	go func() {
		lastSearch := time.Now()
		for _ = range time.Tick(tickInterval) {
			glog.Infof("Searching for builds which failed update_scripts.")
			currentSearch := time.Now()
			builds, err := buildbot.GetBuildsFromDateRange(lastSearch, currentSearch)
			lastSearch = currentSearch
			if err != nil {
				glog.Error(err)
				continue
			}
			for _, b := range builds {
				for _, s := range b.Steps {
					if s.Name == "update_scripts" {
						if s.Results != 0 {
							if err := am.AddAlert(&alerting.Alert{
								Name:     "update_scripts failed",
								Category: alerting.INFRA_ALERT,
								Message:  fmt.Sprintf(UPDATE_SCRIPTS, b.Builder, b.Master, b.Builder, b.Number, b.Builder, b.BuildSlave),
								Actions:  actions,
							}); err != nil {
								glog.Error(err)
							}
						}
						break
					}
				}
			}
		}
	}()
}