コード例 #1
0
func EmitRunOnceStates(datadogClient *datadog.Client, store *etcdstoreadapter.ETCDStoreAdapter, etcdMachines []string) {
	for {
		now := time.Now().Unix()
		all, err := store.ListRecursively("/v1/run_once")
		if err != nil {
			log.Println("failed to get all RunOnces:", err)
			time.Sleep(1 * time.Second)
			continue
		}

		metrics := []datadog.Metric{}

		for i, etcdMachine := range etcdMachines {
			stats := map[string]int{}
			resp, err := http.Get(urljoiner.Join(etcdMachine, "/v2/stats/store"))
			if err != nil {
				log.Println("failed to fetch stats:", err)
				continue
			}
			data, _ := ioutil.ReadAll(resp.Body)
			resp.Body.Close()

			json.Unmarshal(data, &stats)

			metrics = append(metrics, datadog.Metric{
				Metric: fmt.Sprintf("etcd_watchers_%d", i),
				Points: []datadog.DataPoint{
					datadog.DataPoint(
						[2]float64{
							float64(now),
							float64(stats["watchers"]),
						},
					),
				},
			})
		}

		for _, state := range []string{"pending", "claimed", "running", "completed", "resolving"} {
			runOnces, found := all.Lookup(state)
			if !found {
				log.Println("failed to find RunOnces in", state, "state")
				time.Sleep(1 * time.Second)
				continue
			}

			metrics = append(metrics, datadog.Metric{
				Metric: "diego_runonce_" + state,
				Points: []datadog.DataPoint{
					datadog.DataPoint(
						[2]float64{
							float64(now),
							float64(len(runOnces.ChildNodes)),
						},
					),
				},
			})
		}

		executors, err := store.ListRecursively("/v1/executor")
		if err != nil {
			log.Println("failed to get all Executors:", err)
			time.Sleep(1 * time.Second)
			continue
		}

		metrics = append(metrics, datadog.Metric{
			Metric: "executors_maintaining_presence",
			Points: []datadog.DataPoint{
				datadog.DataPoint(
					[2]float64{
						float64(now),
						float64(len(executors.ChildNodes)),
					},
				),
			},
		})

		err = datadogClient.PostMetrics(metrics)
		if err != nil {
			log.Println("failed to post metrics:", err)
		}

		time.Sleep(1 * time.Second)
	}
}
コード例 #2
0
func RunonceStampede(bbs *bbs.BBS, datadogClient *datadog.Client, runOnce *models.RunOnce, runOnceCount int) {

	completed, stop, errs := bbs.WatchForCompletedRunOnce()

	startAll := time.Now()

	if datadogClient != nil {
		event, err := datadogClient.PostEvent(&datadog.Event{
			Title: "diego_runonce_stampede_start",
			Text:  "started the stampede",
			Tags:  []string{fmt.Sprintf("count:%d", runOnceCount)},
		})

		log.Println("posted start event:", event, err)

		defer func() {
			event, err := datadogClient.PostEvent(&datadog.Event{
				Title: "diego_runonce_stampede_stop",
				Text:  "stopped the stampede",
				Tags: []string{
					fmt.Sprintf("count:%d", runOnceCount),
					fmt.Sprintf("duration:%s", time.Since(startAll)),
				},
			})

			log.Println("posted stop event:", event, err)
		}()
	}

	startTimes := make(chan runOnceTime, runOnceCount)
	go func() {
		for i := 0; i < runOnceCount; i++ {
			go createRunOnce(runOnce, startTimes, bbs)
		}
	}()

	seenRunOnces := 0
	runOnceStartTimes := make(map[string]time.Time)
	waitGroup := &sync.WaitGroup{}

	timer := time.After(100 * time.Minute)

OUTER:
	for {
		if seenRunOnces == runOnceCount {
			timer = time.After(30 * time.Second)
		}

		select {
		case startTime := <-startTimes:
			runOnceStartTimes[startTime.guid] = startTime.startTime

		case completedRunOnce := <-completed:
			startedAt, found := runOnceStartTimes[completedRunOnce.Guid]
			if !found {
				continue
			}

			log.Println("done:", seenRunOnces, RunOnceResult{
				Guid:     completedRunOnce.Guid,
				Duration: time.Since(startedAt),
				Failed:   completedRunOnce.Failed,
			})

			seenRunOnces++
			waitGroup.Add(1)
			go func() {
				log.Println("deleting", completedRunOnce.Guid)
				err := bbs.ResolveRunOnce(completedRunOnce)
				if err != nil {
					log.Println("failed to resolve run once:", completedRunOnce.Guid, err)
				} else {
					log.Println("deleted:", completedRunOnce.Guid)
				}
				waitGroup.Done()
			}()
		case err := <-errs:
			log.Println("watch error:", err)
		case <-timer:
			break OUTER
		}
	}

	waitGroup.Wait()

	close(stop)
}