Example #1
0
func monitorETCD(etcdAdapter *etcdstoreadapter.ETCDStoreAdapter) {
	out, err := os.Create(filepath.Join(outDir, "etcdstats.log"))
	if err != nil {
		logger.Fatal("etcd.log.creation.failure", err)
	}

	cleanup.Register(func() {
		out.Sync()
	})

	go func() {
		ticker := time.NewTicker(time.Second)
		for {
			<-ticker.C
			t := time.Now()
			logger.Info("fetch.etcd.runonce.data")
			runOnceNodes, err := etcdAdapter.ListRecursively(Bbs.TaskSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.runOnceNodes.error", err)
			}

			executorNode, err := etcdAdapter.ListRecursively(Bbs.ExecutorSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.executorNode.error", err)
			}
			readTime := time.Since(t)

			d := etcdData{
				Time:              float64(time.Now().UnixNano()) / 1e9,
				RunningByExecutor: map[string]int{},
				PresentExecutors:  len(executorNode.ChildNodes),
				ReadTime:          float64(readTime) / 1e9,
			}

			for _, node := range runOnceNodes.ChildNodes {
				runOnce, err := models.NewTaskFromJSON(node.Value)
				if err != nil {
					logger.Error("etcd.decode.runonce", err)
					continue
				}

				switch runOnce.State {
				case models.TaskStatePending:
					d.Pending++
				case models.TaskStateClaimed:
					d.Claimed++
				case models.TaskStateRunning:
					d.Running++
					d.RunningByExecutor[runOnce.ExecutorID]++
				case models.TaskStateCompleted:
					d.Completed++
				}
			}

			logger.Info("fetched.etcd.runonce.data", time.Since(t), d.String())
			out.Write(d.toJson())
			out.Write([]byte("\n"))
		}
	}()
}
Example #2
0
func (h *Handler) syncTable(etcd *etcdstoreadapter.ETCDStoreAdapter, syncInterval time.Duration) {
	for {
		allNodes, _ := etcd.ListRecursively("/v1/routes")

		newTable := map[string]Route{}

		fanouts, _ := allNodes.Lookup("fanout")
		roundRobins, _ := allNodes.Lookup("round-robin")

		for _, host := range fanouts.ChildNodes {
			if len(host.ChildNodes) == 0 {
				continue
			}

			hostname := path.Base(host.Key)

			route := newTable[hostname]
			route.Dispatch = Fanout

			for _, endpoint := range host.ChildNodes {
				route.Endpoints = append(
					route.Endpoints,
					&Endpoint{Addr: path.Base(endpoint.Key)},
				)

				log.Println("registering", hostname, endpoint.Key)
			}

			newTable[hostname] = route
		}

		for _, host := range roundRobins.ChildNodes {
			if len(host.ChildNodes) == 0 {
				continue
			}

			hostname := path.Base(host.Key)

			route := newTable[hostname]
			route.Dispatch = RoundRobin

			for _, endpoint := range host.ChildNodes {
				route.Endpoints = append(
					route.Endpoints,
					&Endpoint{Addr: path.Base(endpoint.Key)},
				)

				log.Println("registering", hostname, endpoint.Key)
			}

			newTable[hostname] = route
		}

		h.Lock()
		h.table = newTable
		h.Unlock()

		time.Sleep(syncInterval)
	}
}
Example #3
0
func registerHandler(etcdAdapter *etcdstoreadapter.ETCDStoreAdapter, addr string, ready chan<- bool) error {
	node := storeadapter.StoreNode{
		Key: "/v1/routes/round-robin/executor/" + addr,
		TTL: 60,
	}

	status, clearNode, err := etcdAdapter.MaintainNode(node)
	if err != nil {
		return err
	}

	tasks.Add(1)

	go func() {
		for {
			select {
			case locked, ok := <-status:
				if locked && ready != nil {
					ready <- true
					ready = nil
				}

				if !locked && ok {
					tasks.Done()
					logger.Fatal("maintain.route.fatal", map[string]interface{}{})
				}

				if !ok {
					tasks.Done()
					return
				}

			case <-stop:
				close(clearNode)

				for _ = range status {
				}

				tasks.Done()

				return
			}
		}
	}()

	return nil
}
func EmitRunOnceStates(datadogClient *datadog.Client, store *etcdstoreadapter.ETCDStoreAdapter, etcdMachines []string) {
	for {
		now := time.Now().Unix()
		all, err := store.ListRecursively("/v1/run_once")
		if err != nil {
			log.Println("failed to get all RunOnces:", err)
			time.Sleep(1 * time.Second)
			continue
		}

		metrics := []datadog.Metric{}

		for i, etcdMachine := range etcdMachines {
			stats := map[string]int{}
			resp, err := http.Get(urljoiner.Join(etcdMachine, "/v2/stats/store"))
			if err != nil {
				log.Println("failed to fetch stats:", err)
				continue
			}
			data, _ := ioutil.ReadAll(resp.Body)
			resp.Body.Close()

			json.Unmarshal(data, &stats)

			metrics = append(metrics, datadog.Metric{
				Metric: fmt.Sprintf("etcd_watchers_%d", i),
				Points: []datadog.DataPoint{
					datadog.DataPoint(
						[2]float64{
							float64(now),
							float64(stats["watchers"]),
						},
					),
				},
			})
		}

		for _, state := range []string{"pending", "claimed", "running", "completed", "resolving"} {
			runOnces, found := all.Lookup(state)
			if !found {
				log.Println("failed to find RunOnces in", state, "state")
				time.Sleep(1 * time.Second)
				continue
			}

			metrics = append(metrics, datadog.Metric{
				Metric: "diego_runonce_" + state,
				Points: []datadog.DataPoint{
					datadog.DataPoint(
						[2]float64{
							float64(now),
							float64(len(runOnces.ChildNodes)),
						},
					),
				},
			})
		}

		executors, err := store.ListRecursively("/v1/executor")
		if err != nil {
			log.Println("failed to get all Executors:", err)
			time.Sleep(1 * time.Second)
			continue
		}

		metrics = append(metrics, datadog.Metric{
			Metric: "executors_maintaining_presence",
			Points: []datadog.DataPoint{
				datadog.DataPoint(
					[2]float64{
						float64(now),
						float64(len(executors.ChildNodes)),
					},
				),
			},
		})

		err = datadogClient.PostMetrics(metrics)
		if err != nil {
			log.Println("failed to post metrics:", err)
		}

		time.Sleep(1 * time.Second)
	}
}