func ServeAPI(l logger.Logger, conf *config.Config) { store := connectToStore(l, conf) apiHandler, err := handlers.New(l, store, buildTimeProvider(l)) if err != nil { l.Error("initialize-handler.failed", err) panic(err) } handler := handlers.BasicAuthWrap(apiHandler, conf.APIServerUsername, conf.APIServerPassword) listenAddr := fmt.Sprintf("%s:%d", conf.APIServerAddress, conf.APIServerPort) members := grouper.Members{ {"api", http_server.New(listenAddr, handler)}, } group := grouper.NewOrdered(os.Interrupt, members) monitor := ifrit.Invoke(sigmon.New(group)) l.Info("started") l.Info(listenAddr) err = <-monitor.Wait() if err != nil { l.Error("exited", err) os.Exit(1) } l.Info("exited") os.Exit(0) }
func connectToMessageBus(l logger.Logger, conf *config.Config) yagnats.NATSClient { members := []yagnats.ConnectionProvider{} for _, natsConf := range conf.NATS { members = append(members, &yagnats.ConnectionInfo{ Addr: fmt.Sprintf("%s:%d", natsConf.Host, natsConf.Port), Username: natsConf.User, Password: natsConf.Password, }) } connectionInfo := &yagnats.ConnectionCluster{ Members: members, } natsClient := yagnats.NewClient() err := natsClient.Connect(connectionInfo) if err != nil { l.Error("Failed to connect to the message bus", err) os.Exit(1) } return natsClient }
func ServeMetrics(steno *gosteno.Logger, l logger.Logger, conf *config.Config) { store := connectToStore(l, conf) messageBus := connectToMessageBus(l, conf) acquireLock(l, conf, "metrics-server") collectorRegistrar := collectorregistrar.NewCollectorRegistrar(messageBus, steno) metricsServer := metricsserver.New( collectorRegistrar, steno, metricsaccountant.New(store), l, store, buildTimeProvider(l), conf, ) err := metricsServer.Start() if err != nil { l.Error("Failed to serve metrics", err) } l.Info("Serving Metrics") select {} }
func connectToStore(l logger.Logger, conf *config.Config) (store.Store, metricsaccountant.UsageTracker) { if conf.StoreType == "etcd" || conf.StoreType == "ZooKeeper" { adapter, workerPool := connectToStoreAdapter(l, conf) return store.NewStore(conf, adapter, l), workerPool } else { l.Error(fmt.Sprintf("Unknown store type %s. Choose one of 'etcd' or 'ZooKeeper'", conf.StoreType), fmt.Errorf("Unkown store type")) os.Exit(1) } return nil, nil }
func buildClock(l logger.Logger) clock.Clock { if os.Getenv("HM9000_FAKE_TIME") == "" { return clock.NewClock() } else { timestamp, err := strconv.Atoi(os.Getenv("HM9000_FAKE_TIME")) if err != nil { l.Error("Failed to load timestamp", err) os.Exit(1) } return NewFixedClock(time.Unix(int64(timestamp), 0)) } }
func connectToStoreAdapter(l logger.Logger, conf *config.Config) (storeadapter.StoreAdapter, metricsaccountant.UsageTracker) { var adapter storeadapter.StoreAdapter workerPool := workerpool.NewWorkerPool(conf.StoreMaxConcurrentRequests) adapter = etcdstoreadapter.NewETCDStoreAdapter(conf.StoreURLs, workerPool) err := adapter.Connect() if err != nil { l.Error("Failed to connect to the store", err) os.Exit(1) } return adapter, workerPool }
func StartEvacuator(l logger.Logger, conf *config.Config) { messageBus := connectToMessageBus(l, conf) store, _ := connectToStore(l, conf) acquireLock(l, conf, "evacuator") evacuator := evacuatorpackage.New(messageBus, store, buildTimeProvider(l), conf, l) evacuator.Listen() l.Info("Listening for DEA Evacuations") select {} }
func acquireLock(l logger.Logger, conf *config.Config, lockName string) { adapter, _ := connectToStoreAdapter(l, conf) l.Info("Acquiring lock for " + lockName) lock := storeadapter.StoreNode{ Key: "/hm/locks/" + lockName, TTL: 10, } status, _, err := adapter.MaintainNode(lock) if err != nil { l.Error("Failed to talk to lock store", err) os.Exit(1) } lockAcquired := make(chan bool) go func() { for { if <-status { if lockAcquired != nil { close(lockAcquired) lockAcquired = nil } } else { l.Error("Lost the lock", errors.New("Lost the lock")) os.Exit(197) } } }() <-lockAcquired l.Info("Acquired lock for " + lockName) }
func buildTimeProvider(l logger.Logger) timeprovider.TimeProvider { if os.Getenv("HM9000_FAKE_TIME") == "" { return timeprovider.NewTimeProvider() } else { timestamp, err := strconv.Atoi(os.Getenv("HM9000_FAKE_TIME")) if err != nil { l.Error("Failed to load timestamp", err) os.Exit(1) } return &faketimeprovider.FakeTimeProvider{ TimeToProvide: time.Unix(int64(timestamp), 0), } } }
func StartListeningForActual(l logger.Logger, c *cli.Context) { conf := loadConfig(l, c) messageBus := connectToMessageBus(l, conf) store := connectToStore(l, conf) listener := actualstatelistener.New(conf, messageBus, store, timeprovider.NewTimeProvider(), l) listener.Start() l.Info("Listening for Actual State", nil) select {} }
func connectToStoreAdapter(l logger.Logger, conf *config.Config, usage *usageTracker) storeadapter.StoreAdapter { var adapter storeadapter.StoreAdapter var around workpool.AroundWork = workpool.DefaultAround if usage != nil { around = usage } workPool := workpool.New(conf.StoreMaxConcurrentRequests, 0, around) adapter = etcdstoreadapter.NewETCDStoreAdapter(conf.StoreURLs, workPool) err := adapter.Connect() if err != nil { l.Error("Failed to connect to the store", err) os.Exit(1) } return adapter }
func connectToStoreAdapter(l logger.Logger, conf *config.Config) (storeadapter.StoreAdapter, metricsaccountant.UsageTracker) { var adapter storeadapter.StoreAdapter workerPool := workerpool.NewWorkerPool(conf.StoreMaxConcurrentRequests) if conf.StoreType == "etcd" { adapter = etcdstoreadapter.NewETCDStoreAdapter(conf.StoreURLs, workerPool) } else if conf.StoreType == "ZooKeeper" { adapter = zookeeperstoreadapter.NewZookeeperStoreAdapter(conf.StoreURLs, workerPool, buildTimeProvider(l), time.Second) } else { l.Error(fmt.Sprintf("Unknown store type %s. Choose one of 'etcd' or 'ZooKeeper'", conf.StoreType), fmt.Errorf("Unkown store type")) os.Exit(1) } err := adapter.Connect() if err != nil { l.Error("Failed to connect to the store", err) os.Exit(1) } return adapter, workerPool }
func StartListeningForActual(l logger.Logger, conf *config.Config) { messageBus := connectToMessageBus(l, conf) store, usageTracker := connectToStoreAndTrack(l, conf) acquireLock(l, conf, "listener") listener := actualstatelistener.New(conf, messageBus, store, usageTracker, metricsaccountant.New(store), buildClock(l), l, ) listener.Start() l.Info("Listening for Actual State") select {} }
func ServeAPI(l logger.Logger, conf *config.Config) { store, _ := connectToStore(l, conf) messageBus := connectToMessageBus(l, conf) //no locking necessary for the api server. it's ok to have multiples of these running. //NATS will distribute the requests and ensure that only one api-server handles a given request //because we use a NATS queue. apiServer := apiserver.New( messageBus, store, buildTimeProvider(l), l, ) apiServer.Listen() l.Info(fmt.Sprintf("Serving API over NATS (subject: app.state)")) select {} }
func Daemonize( component string, callback func() error, period time.Duration, timeout time.Duration, logger logger.Logger, adapter storeadapter.StoreAdapter, ) error { logger.Info("Acquiring lock for " + component) lostLockChannel, releaseLockChannel, err := adapter.GetAndMaintainLock(component, 10) if err != nil { logger.Info(fmt.Sprintf("Failed to acquire lock: %s", err)) return err } go func() { <-lostLockChannel logger.Error("Lost the lock", errors.New("Lock the lock")) os.Exit(197) }() logger.Info("Acquired lock for " + component) logger.Info(fmt.Sprintf("Running Daemon every %d seconds with a timeout of %d", int(period.Seconds()), int(timeout.Seconds()))) for { afterChan := time.After(period) timeoutChan := time.After(timeout) errorChan := make(chan error, 1) t := time.Now() go func() { errorChan <- callback() }() select { case err := <-errorChan: logger.Info("Daemonize Time", map[string]string{ "Component": component, "Duration": fmt.Sprintf("%.4f", time.Since(t).Seconds()), }) if err != nil { logger.Error("Daemon returned an error. Continuining...", err) } case <-timeoutChan: releaseLockChannel <- true return errors.New("Daemon timed out. Aborting!") } <-afterChan } return nil }
func FetchDesiredState(l logger.Logger, conf *config.Config, poll bool) { store, _ := connectToStore(l, conf) if poll { l.Info("Starting Desired State Daemon...") adapter, _ := connectToStoreAdapter(l, conf) err := Daemonize("Fetcher", func() error { return fetchDesiredState(l, conf, store) }, conf.FetcherPollingInterval(), conf.FetcherTimeout(), l, adapter) if err != nil { l.Error("Desired State Daemon Errored", err) } l.Info("Desired State Daemon is Down") os.Exit(1) } else { err := fetchDesiredState(l, conf, store) if err != nil { os.Exit(1) } else { os.Exit(0) } } }
func Analyze(l logger.Logger, conf *config.Config, poll bool) { store := connectToStore(l, conf) if poll { l.Info("Starting Analyze Daemon...") adapter := connectToStoreAdapter(l, conf, nil) err := Daemonize("Analyzer", func() error { return analyze(l, conf, store) }, conf.AnalyzerPollingInterval(), conf.AnalyzerTimeout(), l, adapter) if err != nil { l.Error("Analyze Daemon Errored", err) } l.Info("Analyze Daemon is Down") os.Exit(1) } else { err := analyze(l, conf, store) if err != nil { os.Exit(1) } else { os.Exit(0) } } }
func connectToMessageBus(l logger.Logger, conf *config.Config) yagnats.NATSConn { members := make([]string, len(conf.NATS)) for _, natsConf := range conf.NATS { uri := url.URL{ Scheme: "nats", User: url.UserPassword(natsConf.User, natsConf.Password), Host: fmt.Sprintf("%s:%d", natsConf.Host, natsConf.Port), } members = append(members, uri.String()) } natsClient, err := yagnats.Connect(members) if err != nil { l.Error("Failed to connect to the message bus", err) os.Exit(1) } natsClient.AddReconnectedCB(func(conn *nats.Conn) { l.Info(fmt.Sprintf("NATS Client Reconnected. Server URL: %s", conn.Opts.Url)) }) natsClient.AddClosedCB(func(conn *nats.Conn) { err := errors.New(fmt.Sprintf("NATS Client Closed. nats.Conn: %+v", conn)) l.Error("NATS Closed", err) os.Exit(1) }) return natsClient }
func connectToStoreAdapter(l logger.Logger, conf *config.Config) storeadapter.StoreAdapter { var adapter storeadapter.StoreAdapter workPool, err := workpool.NewWorkPool(conf.StoreMaxConcurrentRequests) if err != nil { l.Error("Failed to create workpool", err) os.Exit(1) } options := &etcdstoreadapter.ETCDOptions{ ClusterUrls: conf.StoreURLs, } adapter, err = etcdstoreadapter.New(options, workPool) if err != nil { l.Error("Failed to create the store adapter", err) os.Exit(1) } err = adapter.Connect() if err != nil { l.Error("Failed to connect to the store", err) os.Exit(1) } return adapter }
func Send(l logger.Logger, conf *config.Config, poll bool) { messageBus := connectToMessageBus(l, conf) store, _ := connectToStore(l, conf) if poll { l.Info("Starting Sender Daemon...") adapter, _ := connectToStoreAdapter(l, conf) err := Daemonize("Sender", func() error { return send(l, conf, messageBus, store) }, conf.SenderPollingInterval(), conf.SenderTimeout(), l, adapter) if err != nil { l.Error("Sender Daemon Errored", err) } l.Info("Sender Daemon is Down") os.Exit(1) } else { err := send(l, conf, messageBus, store) if err != nil { os.Exit(1) } else { os.Exit(0) } } }
func FetchDesiredState(l logger.Logger, c *cli.Context) { conf := loadConfig(l, c) messageBus := connectToMessageBus(l, conf) store := connectToStore(l, conf) fetcher := desiredstatefetcher.New(conf, messageBus, store, httpclient.NewHttpClient(), timeprovider.NewTimeProvider(), ) resultChan := make(chan desiredstatefetcher.DesiredStateFetcherResult, 1) fetcher.Fetch(resultChan) select { case result := <-resultChan: if result.Success { l.Info("Success", map[string]string{"Number of Desired Apps Fetched": strconv.Itoa(result.NumResults)}) os.Exit(0) } else { l.Info(result.Message, map[string]string{"Error": result.Error.Error(), "Message": result.Message}) os.Exit(1) } case <-time.After(600 * time.Second): l.Info("Timed out when fetching desired state", nil) os.Exit(1) } }
func acquireLock(l logger.Logger, conf *config.Config, lockName string) { adapter, _ := connectToStoreAdapter(l, conf) l.Info("Acquiring lock for " + lockName) lostLockChannel, _, err := adapter.GetAndMaintainLock(lockName, 10) if err != nil { l.Error("Failed to talk to lock store", err) os.Exit(1) } go func() { <-lostLockChannel l.Error("Lost the lock", errors.New("Lock the lock")) os.Exit(197) }() l.Info("Acquired lock for " + lockName) }
func analyze(l logger.Logger, conf *config.Config, store store.Store) error { l.Info("Analyzing...") analyzer := analyzer.New(store, buildTimeProvider(l), l, conf) err := analyzer.Analyze() if err != nil { l.Error("Analyzer failed with error", err) return err } else { l.Info("Analyzer completed succesfully") return nil } }
func send(l logger.Logger, conf *config.Config, messageBus yagnats.NATSClient, store store.Store) error { l.Info("Sending...") sender := sender.New(store, metricsaccountant.New(store), conf, messageBus, buildTimeProvider(l), l) err := sender.Send() if err != nil { l.Error("Sender failed with error", err) return err } else { l.Info("Sender completed succesfully") return nil } }
func acquireLock(l logger.Logger, conf *config.Config, lockName string) { adapter, _ := connectToStoreAdapter(l, conf) l.Info("Acquiring lock for " + lockName) lock := storeadapter.StoreNode{ Key: "/hm/locks/" + lockName, TTL: 10, } lostLockChannel, _, err := adapter.MaintainNode(lock) if err != nil { l.Error("Failed to talk to lock store", err) os.Exit(1) } go func() { <-lostLockChannel l.Error("Lost the lock", errors.New("Lock the lock")) os.Exit(197) }() l.Info("Acquired lock for " + lockName) }
func fetchDesiredState(l logger.Logger, conf *config.Config, store store.Store) error { l.Info("Fetching Desired State") fetcher := desiredstatefetcher.New(conf, store, metricsaccountant.New(store), httpclient.NewHttpClient(conf.SkipSSLVerification, conf.FetcherNetworkTimeout()), buildTimeProvider(l), l, ) resultChan := make(chan desiredstatefetcher.DesiredStateFetcherResult, 1) fetcher.Fetch(resultChan) result := <-resultChan if result.Success { l.Info("Success", map[string]string{"Number of Desired Apps Fetched": strconv.Itoa(result.NumResults)}) return nil } else { l.Error(result.Message, result.Error) return result.Error } return nil }
func shred(l logger.Logger, store store.Store) error { l.Info("Shredding Store") theShredder := shredder.New(store) return theShredder.Shred() }
func Daemonize( component string, callback func() error, period time.Duration, timeout time.Duration, logger logger.Logger, adapter storeadapter.StoreAdapter, ) error { logger.Info("Acquiring lock for " + component) lock := storeadapter.StoreNode{ Key: "/hm/locks/" + component, TTL: 10, } status, releaseLockChannel, err := adapter.MaintainNode(lock) if err != nil { logger.Info(fmt.Sprintf("Failed to acquire lock: %s", err)) return err } lockAcquired := make(chan bool) go func() { for { if <-status { if lockAcquired != nil { close(lockAcquired) lockAcquired = nil } } else { logger.Error("Lost the lock", errors.New("Lock the lock")) os.Exit(197) } } }() <-lockAcquired logger.Info("Acquired lock for " + component) logger.Info(fmt.Sprintf("Running Daemon every %d seconds with a timeout of %d", int(period.Seconds()), int(timeout.Seconds()))) for { afterChan := time.After(period) timeoutChan := time.After(timeout) errorChan := make(chan error, 1) t := time.Now() go func() { errorChan <- callback() }() select { case err := <-errorChan: logger.Info("Daemonize Time", map[string]string{ "Component": component, "Duration": fmt.Sprintf("%.4f", time.Since(t).Seconds()), }) if err != nil { logger.Error("Daemon returned an error. Continuining...", err) } case <-timeoutChan: released := make(chan bool) releaseLockChannel <- released <-released return errors.New("Daemon timed out. Aborting!") } <-afterChan } return nil }