Exemple #1
0
// MonitorPodHealth is meant to be a long running go routine.
// MonitorPodHealth reads from a consul store to determine which
// services should be running on the host. MonitorPodHealth
// runs a CheckHealth routine to monitor the health of each
// service and kills routines for services that should no
// longer be running.
func MonitorPodHealth(config *preparer.PreparerConfig, logger *logging.Logger, shutdownCh chan struct{}) {
	client, err := config.GetConsulClient()
	if err != nil {
		// A bad config should have already produced a nice, user-friendly error message.
		logger.WithError(err).Fatalln("error creating health monitor KV client")
	}
	store := kp.NewConsulStore(client)
	healthManager := store.NewHealthManager(config.NodeName, *logger)

	node := config.NodeName
	pods := []PodWatch{}

	watchQuitCh := make(chan struct{})
	watchErrCh := make(chan error)
	watchPodCh := make(chan []kp.ManifestResult)
	go store.WatchPods(
		kp.REALITY_TREE,
		node,
		watchQuitCh,
		watchErrCh,
		watchPodCh,
	)

	// if GetClient fails it means the certfile/keyfile/cafile were
	// invalid or did not exist. It makes sense to throw a fatal error
	secureClient, err := config.GetClient(time.Duration(*HEALTHCHECK_TIMEOUT) * time.Second)
	if err != nil {
		logger.WithError(err).Fatalln("failed to get http client for this preparer")
	}

	insecureClient, err := config.GetInsecureClient(time.Duration(*HEALTHCHECK_TIMEOUT) * time.Second)
	if err != nil {
		logger.WithError(err).Fatalln("failed to get http client for this preparer")
	}

	for {
		select {
		case results := <-watchPodCh:
			// check if pods have been added or removed
			// starts monitor routine for new pods
			// kills monitor routine for removed pods
			pods = updatePods(healthManager, secureClient, insecureClient, pods, results, node, logger)
		case err := <-watchErrCh:
			logger.WithError(err).Errorln("there was an error reading reality manifests for health monitor")
		case <-shutdownCh:
			for _, pod := range pods {
				pod.shutdownCh <- true
			}
			close(watchQuitCh)
			healthManager.Close()
			return
		}
	}
}
Exemple #2
0
func verifyHealthChecks(config *preparer.PreparerConfig, services []string) error {
	client, err := config.GetConsulClient()
	if err != nil {
		return err
	}
	store := kp.NewConsulStore(client)

	time.Sleep(30 * time.Second)
	// check consul for health information for each app
	name, err := os.Hostname()
	if err != nil {
		return err
	}

	node := types.NodeName(name)
	for _, sv := range services {
		res, err := store.GetHealth(sv, node)
		if err != nil {
			return err
		} else if (res == kp.WatchResult{}) {
			return fmt.Errorf("No results for %s: \n\n %s", sv, targetLogs())
		} else if res.Status != string(health.Passing) {
			return fmt.Errorf("%s did not pass health check: \n\n %s", sv, targetLogs())
		} else {
			fmt.Println(res)
		}
	}

	for _, sv := range services {
		res, err := store.GetServiceHealth(sv)
		getres, _ := store.GetHealth(sv, node)
		if err != nil {
			return err
		}
		val := res[kp.HealthPath(sv, node)]
		if getres.Id != val.Id || getres.Service != val.Service || getres.Status != val.Status {
			return fmt.Errorf("GetServiceHealth failed %+v: \n\n%s", res, targetLogs())
		}
	}

	// if it reaches here it means health checks
	// are being written to the KV store properly
	return nil
}