func (ep *EventProcessor) handleEvents(events []consul.Event) { for _, event := range events { log.Println("----------------------------------------") log.Printf("Processing event %s:\n", event.ID) log.Println("----------------------------------------") eventHandlers := consulClient.EventHandlers(event.Name) for _, eventHandler := range eventHandlers { data, err := json.Marshal(&event) if err != nil { log.Println("Unable to read event: ", event) // then what? } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(eventHandler) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running handler: ", err) } else { log.Printf(">>> \n%s -> %s:\n %s\n", event.ID, eventHandler, output) } } log.Printf("Event Processed.\n\n") } }
func processEvent(event consul.Event) { log.Println("----------------------------------------") log.Printf("Processing event %s:\n", event.ID) log.Println("----------------------------------------") eventHandlers := consulClient.EventHandlers(event.Name) for _, eventHandler := range eventHandlers { executeEventHandler(event, eventHandler) } log.Printf("Event Processed.\n\n") }
//Notify sends messages to the endpoint notifier func (notifier *HipChatNotifier) Notify(messages Messages) bool { overallStatus, pass, warn, fail := messages.Summary() text := fmt.Sprintf("%s is <STRONG>%s</STRONG>. Fail: %d, Warn: %d, Pass: %d", notifier.ClusterName, overallStatus, fail, warn, pass) for _, message := range messages { text += fmt.Sprintf("<BR><STRONG><CODE>%s</CODE></STRONG>:%s:%s is <STRONG>%s</STRONG>.", message.Node, html.EscapeString(message.Service), html.EscapeString(message.Check), message.Status) if utf8.RuneCountInString(message.Output) > 0 { text += fmt.Sprintf("<BR>%s", strings.Replace(html.EscapeString(strings.TrimSpace(message.Output)), "\n", "<BR>", -1)) } } level := "green" if fail > 0 { level = "red" } else if warn > 0 { level = "yellow" } client := hipchat.NewClient(notifier.AuthToken) if notifier.BaseURL != "" { url, err := url.Parse(notifier.BaseURL) if err != nil { log.Printf("Error parsing hipchat base url: %s\n", err) } client.BaseURL = url } from := "" if notifier.From != "" { from = notifier.From } notifRq := &hipchat.NotificationRequest{ Color: level, Message: text, Notify: true, MessageFormat: "html", From: from, } resp, err := client.Room.Notification(notifier.RoomId, notifRq) if err != nil { log.Printf("Error sending notification to hipchat: %s\n", err) log.Printf("Server returns %+v\n", resp) return false } return true }
func (c *ConsulAlertClient) registerHealthCheck(key string, health *Check) { log.Printf( "Registering new health check: node=%s, service=%s, check=%s, status=%s", health.Node, health.ServiceName, health.Name, health.Status, ) var newStatus Status if health.Status == "passing" { newStatus = Status{ Current: health.Status, CurrentTimestamp: time.Now(), HealthCheck: health, } } else { newStatus = Status{ Pending: health.Status, PendingTimestamp: time.Now(), HealthCheck: health, } } statusData, _ := json.Marshal(newStatus) c.api.KV().Put(&consulapi.KVPair{Key: key, Value: statusData}, nil) }
func (c *ConsulAlertClient) UpdateCheckData() { healthApi := c.api.Health() kvApi := c.api.KV() healths, _, _ := healthApi.State("any", nil) for _, health := range healths { node := health.Node service := health.ServiceID check := health.CheckID if service == "" { service = "_" } key := fmt.Sprintf("consul-alerts/checks/%s/%s/%s", node, service, check) status, _, _ := kvApi.Get(key, nil) existing := status != nil localHealth := Check(*health) if c.IsBlacklisted(&localHealth) { log.Printf("%s:%s:%s is blacklisted.", node, service, check) return } if !existing { c.registerHealthCheck(key, &localHealth) } else { c.updateHealthCheck(key, &localHealth) } } }
func runWatcher(address, datacenter, watchType string) { consulAlert := os.Args[0] cmd := exec.Command( "consul", "watch", "-http-addr", address, "-datacenter", datacenter, "-type", watchType, consulAlert, "watch", watchType) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { var exitCode int switch err.(type) { case *exec.ExitError: exitError, _ := err.(*exec.ExitError) status, _ := exitError.Sys().(syscall.WaitStatus) exitCode = status.ExitStatus() log.Println("Shutting down watcher --> Exit Code: ", exitCode) case *exec.Error: exitCode = 1 log.Println("Shutting down watcher --> Something went wrong running consul watch: ", err.Error()) default: exitCode = 127 log.Println("Shutting down watcher --> Unknown error: ", err.Error()) } os.Exit(exitCode) } else { log.Printf("Execution complete.") } }
func healthHandler(w http.ResponseWriter, r *http.Request) { node := r.URL.Query().Get("node") service := r.URL.Query().Get("service") check := r.URL.Query().Get("check") log.Println(node, service, check) status, output := consulClient.CheckStatus(node, service, check) var code int switch status { case "passing": code = 200 case "warning", "critical": code = 503 default: status = "unknown" code = 404 } log.Printf("health status check result for node=%s,service=%s,check=%s: %d", node, service, check, code) var result string if output == "" { result = "" } else { result = fmt.Sprintf("output: %s\n", output) } body := fmt.Sprintf("status: %s\n%s", status, result) w.WriteHeader(code) w.Write([]byte(body)) }
//Notify sends messages to the endpoint notifier func (notifier *HipChatNotifier) Notify(messages Messages) bool { overallStatus, pass, warn, fail := messages.Summary() text := fmt.Sprintf(header, notifier.ClusterName, overallStatus, fail, warn, pass) for _, message := range messages { text += fmt.Sprintf("\n%s:%s:%s is %s.", message.Node, message.Service, message.Check, message.Status) text += fmt.Sprintf("\n%s", message.Output) } level := "green" if fail > 0 { level = "red" } else if warn > 0 { level = "yellow" } client := hipchat.NewClient(notifier.AuthToken) if notifier.BaseURL != "" { url, err := url.Parse(notifier.BaseURL) if err != nil { log.Printf("Error parsing hipchat base url: %s\n", err) } client.BaseURL = url } from := "" if notifier.From != "" { from = notifier.From } notifRq := &hipchat.NotificationRequest{ From: from, Message: text, Color: level, Notify: true, } resp, err := client.Room.Notification(notifier.RoomId, notifRq) if err != nil { log.Printf("Error sending notification to hipchat: %s\n", err) log.Printf("Server returns %+v\n", resp) return false } return true }
func daemonMode(arguments map[string]interface{}) { addr := arguments["--alert-addr"].(string) url := fmt.Sprintf("http://%s/v1/info", addr) resp, err := http.Get(url) if err == nil && resp.StatusCode == 201 { version := resp.Header.Get("version") resp.Body.Close() log.Printf("consul-alert daemon already running version: %s", version) os.Exit(1) } consulAclToken := arguments["--consul-acl-token"].(string) consulAddr := arguments["--consul-addr"].(string) consulDc := arguments["--consul-dc"].(string) watchChecks := arguments["--watch-checks"].(bool) watchEvents := arguments["--watch-events"].(bool) consulClient, err = consul.NewClient(consulAddr, consulDc, consulAclToken) if err != nil { log.Println("Cluster has no leader or is unreacheable.", err) os.Exit(3) } hostname, _ := os.Hostname() log.Println("Consul ACL Token:", consulAclToken) log.Println("Consul Alerts daemon started") log.Println("Consul Alerts Host:", hostname) log.Println("Consul Agent:", consulAddr) log.Println("Consul Datacenter:", consulDc) leaderCandidate := startLeaderElection(consulAddr, consulDc, consulAclToken) notifEngine := startNotifEngine() if watchChecks { go runWatcher(consulAddr, consulDc, "checks") } if watchEvents { go runWatcher(consulAddr, consulDc, "event") } ep := startEventProcessor() cp := startCheckProcessor(leaderCandidate, notifEngine) http.HandleFunc("/v1/info", infoHandler) http.HandleFunc("/v1/process/events", ep.eventHandler) http.HandleFunc("/v1/process/checks", cp.checkHandler) http.HandleFunc("/v1/health", healthHandler) go http.ListenAndServe(addr, nil) ch := make(chan os.Signal) signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM, os.Interrupt) <-ch cleanup(notifEngine, cp, ep, leaderCandidate) }
func (logNotifier *LogNotifier) Notify(alerts Messages) bool { logrus.Println("logging messages...") logDir := path.Dir(logNotifier.LogFile) err := os.MkdirAll(logDir, os.ModePerm) if err != nil { logrus.Printf("unable to create directory for logfile: %v\n", err) return false } file, err := os.OpenFile(logNotifier.LogFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) if err != nil { logrus.Printf("unable to write to logfile: %v\n", err) return false } logger := log.New(file, "[consul-notifier] ", log.LstdFlags) for _, alert := range alerts { logger.Printf("Node=%s, Service=%s, Check=%s, Status=%s\n", alert.Node, alert.Service, alert.Check, alert.Status) } logrus.Println("Notifications logged.") return true }
func healthWildcardHandler(w http.ResponseWriter, r *http.Request) { node := r.URL.Query().Get("node") service := r.URL.Query().Get("service") check := r.URL.Query().Get("check") status := r.URL.Query().Get("status") alwaysOk := r.URL.Query().Get("alwaysOk") != "" // Always return 200 code, even if failures in data ignoreBlacklist := r.URL.Query().Get("ignoreBlacklist") != "" var statuses []string if status != "" { statuses = strings.Split(status, ",") } log.Printf("Query: node: %v, service: %v, check: %v, status: %v, alwaysOk: %v, ignoreBlacklist: %v", node, service, check, status, alwaysOk, ignoreBlacklist) alerts := consulClient.NewAlertsWithFilter(node, service, check, statuses, ignoreBlacklist) code := 200 if !alwaysOk { var newCode int for _, alert := range alerts { switch alert.Status { case "passing": newCode = 200 case "warning", "critical": newCode = 503 default: status = "unknown" newCode = 404 } if newCode > code { code = newCode } } } body, _ := json.Marshal(alerts) w.WriteHeader(code) w.Write([]byte(body)) }
func executeEventHandler(event consul.Event, eventHandler string) { data, err := json.Marshal(&event) if err != nil { log.Println("Unable to read event: ", event) // then what? } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(eventHandler) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running handler: ", err) } else { log.Printf(">>> \n%s -> %s:\n %s\n", event.ID, eventHandler, output) } }
//Notify sends messages to the endpoint notifier func (pd *PagerDutyNotifier) Notify(messages Messages) bool { client := gopherduty.NewClient(pd.ServiceKey) result := true for _, message := range messages { incidentKey := message.Node if message.ServiceId != "" { incidentKey += ":" + message.ServiceId } incidentKey += ":" + message.CheckId var response *gopherduty.PagerDutyResponse switch { case message.IsPassing(): description := incidentKey + " is now HEALTHY" response = client.Resolve(incidentKey, description, message) case message.IsWarning(): description := incidentKey + " is UNSTABLE" response = client.Trigger(incidentKey, description, pd.ClientName, pd.ClientUrl, message) case message.IsCritical(): description := incidentKey + " is CRITICAL" response = client.Trigger(incidentKey, description, pd.ClientName, pd.ClientUrl, message) } if response.HasErrors() { for _, err := range response.Errors { log.Printf("Error sending %s notification to pagerduty: %s\n", incidentKey, err) } result = false } } log.Println("PagerDuty notification complete") return result }
func (c *ConsulAlertClient) LoadConfig() { if kvPairs, _, err := c.api.KV().List("consul-alerts/config", nil); err == nil { config := c.config for _, kvPair := range kvPairs { key := kvPair.Key val := kvPair.Value var valErr error switch key { // checks config case "consul-alerts/config/checks/enabled": valErr = loadCustomValue(&config.Checks.Enabled, val, ConfigTypeBool) case "consul-alerts/config/checks/change-threshold": valErr = loadCustomValue(&config.Checks.ChangeThreshold, val, ConfigTypeInt) // events config case "consul-alerts/config/events/enabled": valErr = loadCustomValue(&config.Events.Enabled, val, ConfigTypeBool) case "consul-alerts/config/events/handlers": valErr = loadCustomValue(&config.Events.Handlers, val, ConfigTypeStrArray) // notifiers config case "consul-alerts/config/notifiers/custom": valErr = loadCustomValue(&config.Notifiers.Custom, val, ConfigTypeStrArray) // email notifier config case "consul-alerts/config/notifiers/email/cluster-name": valErr = loadCustomValue(&config.Notifiers.Email.ClusterName, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/template": valErr = loadCustomValue(&config.Notifiers.Email.Template, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/enabled": valErr = loadCustomValue(&config.Notifiers.Email.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/email/password": valErr = loadCustomValue(&config.Notifiers.Email.Password, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/port": valErr = loadCustomValue(&config.Notifiers.Email.Port, val, ConfigTypeInt) case "consul-alerts/config/notifiers/email/receivers": valErr = loadCustomValue(&config.Notifiers.Email.Receivers, val, ConfigTypeStrArray) case "consul-alerts/config/notifiers/email/sender-alias": valErr = loadCustomValue(&config.Notifiers.Email.SenderAlias, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/sender-email": valErr = loadCustomValue(&config.Notifiers.Email.SenderEmail, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/url": valErr = loadCustomValue(&config.Notifiers.Email.Url, val, ConfigTypeString) case "consul-alerts/config/notifiers/email/username": valErr = loadCustomValue(&config.Notifiers.Email.Username, val, ConfigTypeString) // log notifier config case "consul-alerts/config/notifiers/log/enabled": valErr = loadCustomValue(&config.Notifiers.Log.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/log/path": valErr = loadCustomValue(&config.Notifiers.Log.Path, val, ConfigTypeString) // influxdb notifier config case "consul-alerts/config/notifiers/influxdb/enabled": valErr = loadCustomValue(&config.Notifiers.Influxdb.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/influxdb/host": valErr = loadCustomValue(&config.Notifiers.Influxdb.Host, val, ConfigTypeString) case "consul-alerts/config/notifiers/influxdb/username": valErr = loadCustomValue(&config.Notifiers.Influxdb.Username, val, ConfigTypeString) case "consul-alerts/config/notifiers/influxdb/password": valErr = loadCustomValue(&config.Notifiers.Influxdb.Password, val, ConfigTypeString) case "consul-alerts/config/notifiers/influxdb/database": valErr = loadCustomValue(&config.Notifiers.Influxdb.Database, val, ConfigTypeString) case "consul-alerts/config/notifiers/influxdb/series-name": valErr = loadCustomValue(&config.Notifiers.Influxdb.SeriesName, val, ConfigTypeString) // slack notfier config case "consul-alerts/config/notifiers/slack/enabled": valErr = loadCustomValue(&config.Notifiers.Slack.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/slack/cluster-name": valErr = loadCustomValue(&config.Notifiers.Slack.ClusterName, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/url": valErr = loadCustomValue(&config.Notifiers.Slack.Url, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/channel": valErr = loadCustomValue(&config.Notifiers.Slack.Channel, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/username": valErr = loadCustomValue(&config.Notifiers.Slack.Username, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/icon-url": valErr = loadCustomValue(&config.Notifiers.Slack.IconUrl, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/icon-emoji": valErr = loadCustomValue(&config.Notifiers.Slack.IconEmoji, val, ConfigTypeString) case "consul-alerts/config/notifiers/slack/detailed": valErr = loadCustomValue(&config.Notifiers.Slack.Detailed, val, ConfigTypeBool) // pager-duty notfier config case "consul-alerts/config/notifiers/pagerduty/enabled": valErr = loadCustomValue(&config.Notifiers.PagerDuty.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/pagerduty/service-key": valErr = loadCustomValue(&config.Notifiers.PagerDuty.ServiceKey, val, ConfigTypeString) case "consul-alerts/config/notifiers/pagerduty/client-name": valErr = loadCustomValue(&config.Notifiers.PagerDuty.ClientName, val, ConfigTypeString) case "consul-alerts/config/notifiers/pagerduty/client-url": valErr = loadCustomValue(&config.Notifiers.PagerDuty.ClientUrl, val, ConfigTypeString) // hipchat notfier config case "consul-alerts/config/notifiers/hipchat/enabled": valErr = loadCustomValue(&config.Notifiers.HipChat.Enabled, val, ConfigTypeBool) case "consul-alerts/config/notifiers/hipchat/cluster-name": valErr = loadCustomValue(&config.Notifiers.HipChat.ClusterName, val, ConfigTypeString) case "consul-alerts/config/notifiers/hipchat/room-id": valErr = loadCustomValue(&config.Notifiers.HipChat.RoomId, val, ConfigTypeString) case "consul-alerts/config/notifiers/hipchat/auth-token": valErr = loadCustomValue(&config.Notifiers.HipChat.AuthToken, val, ConfigTypeString) case "consul-alerts/config/notifiers/hipchat/base-url": valErr = loadCustomValue(&config.Notifiers.HipChat.BaseURL, val, ConfigTypeString) } if valErr != nil { log.Printf(`unable to load custom value for "%s". Using default instead. Error: %s`, key, valErr.Error()) } } } else { log.Println("Unable to load custom config, using default instead:", err) } }
func (c *ConsulAlertClient) updateHealthCheck(key string, health *Check) { kvpair, _, _ := c.api.KV().Get(key, nil) val := kvpair.Value var storedStatus Status json.Unmarshal(val, &storedStatus) // no status change if the stored status and latest status is the same noStatusChange := storedStatus.Current == health.Status // new pending status if it's a new status and it's not the same as the pending status newPendingStatus := storedStatus.Current != health.Status && storedStatus.Pending != health.Status // status is still pending for change. will change if it reaches threshold stillPendingStatus := storedStatus.Current != health.Status && storedStatus.Pending == health.Status switch { case noStatusChange: if storedStatus.Pending != "" { storedStatus.Pending = "" storedStatus.PendingTimestamp = time.Time{} log.Printf( "%s:%s:%s is now back to %s.", health.Node, health.ServiceName, health.Name, storedStatus.Current, ) } case newPendingStatus: storedStatus.Pending = health.Status storedStatus.PendingTimestamp = time.Now() log.Printf( "%s:%s:%s is now pending status change from %s to %s.", health.Node, health.ServiceName, health.Name, storedStatus.Current, storedStatus.Pending, ) case stillPendingStatus: duration := time.Since(storedStatus.PendingTimestamp) if int(duration.Seconds()) >= c.config.Checks.ChangeThreshold { log.Printf( "%s:%s:%s has changed status from %s to %s.", health.Node, health.ServiceName, health.Name, storedStatus.Current, storedStatus.Pending, ) storedStatus.Current = storedStatus.Pending storedStatus.CurrentTimestamp = time.Now() storedStatus.Pending = "" storedStatus.PendingTimestamp = time.Time{} storedStatus.ForNotification = true } else { log.Printf( "%s:%s:%s is pending status change from %s to %s for %s.", health.Node, health.ServiceName, health.Name, storedStatus.Current, storedStatus.Pending, duration, ) } } storedStatus.HealthCheck = health data, _ := json.Marshal(storedStatus) c.api.KV().Put(&consulapi.KVPair{Key: key, Value: data}, nil) }
func daemonMode(arguments map[string]interface{}) { // Define options before setting in either config file or on command line loglevelString := "" consulAclToken := "" consulAddr := "" consulDc := "" watchChecks := false watchEvents := false addr := "" var confData map[string]interface{} // This exists check only works for arguments with no default. arguments with defaults will always exist. // Because of this the current code overrides command line flags with config file options if set. if configFile, exists := arguments["--config-file"].(string); exists { file, err := ioutil.ReadFile(configFile) if err != nil { log.Error(err) } err = json.Unmarshal(file, &confData) if err != nil { log.Error(err) } log.Debug("Config data: ", confData) } if confData["log-level"] != nil { loglevelString = confData["log-level"].(string) } else { loglevelString = arguments["--log-level"].(string) } if confData["consul-acl-token"] != nil { consulAclToken = confData["consul-acl-token"].(string) } else { consulAclToken = arguments["--consul-acl-token"].(string) } if confData["consul-addr"] != nil { consulAddr = confData["consul-addr"].(string) } else { consulAddr = arguments["--consul-addr"].(string) } if confData["consul-dc"] != nil { consulDc = confData["consul-dc"].(string) } else { consulDc = arguments["--consul-dc"].(string) } if confData["alert-addr"] != nil { addr = confData["alert-addr"].(string) } else { addr = arguments["--alert-addr"].(string) } if confData["watch-checks"] != nil { watchChecks = confData["watch-checks"].(bool) } else { watchChecks = arguments["--watch-checks"].(bool) } if confData["watch-events"] != nil { watchEvents = confData["watch-events"].(bool) } else { watchEvents = arguments["--watch-events"].(bool) } if loglevelString != "" { loglevel, err := log.ParseLevel(loglevelString) if err == nil { log.SetLevel(loglevel) } else { log.Println("Log level not set:", err) } } url := fmt.Sprintf("http://%s/v1/info", addr) resp, err := http.Get(url) if err == nil && resp.StatusCode == 201 { version := resp.Header.Get("version") resp.Body.Close() log.Printf("consul-alert daemon already running version: %s", version) os.Exit(1) } consulClient, err = consul.NewClient(consulAddr, consulDc, consulAclToken) if err != nil { log.Println("Cluster has no leader or is unreacheable.", err) os.Exit(3) } hostname, _ := os.Hostname() log.Println("Consul ACL Token:", consulAclToken) log.Println("Consul Alerts daemon started") log.Println("Consul Alerts Host:", hostname) log.Println("Consul Agent:", consulAddr) log.Println("Consul Datacenter:", consulDc) leaderCandidate := startLeaderElection(consulAddr, consulDc, consulAclToken) notifEngine := startNotifEngine() ep := startEventProcessor() cp := startCheckProcessor(leaderCandidate, notifEngine) http.HandleFunc("/v1/info", infoHandler) http.HandleFunc("/v1/process/events", ep.eventHandler) http.HandleFunc("/v1/process/checks", cp.checkHandler) http.HandleFunc("/v1/health/wildcard", healthWildcardHandler) http.HandleFunc("/v1/health", healthHandler) go startAPI(addr) log.Println("Started Consul-Alerts API") if watchChecks { go runWatcher(consulAddr, consulDc, addr, loglevelString, "checks") } if watchEvents { go runWatcher(consulAddr, consulDc, addr, loglevelString, "event") } ch := make(chan os.Signal) signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM, os.Interrupt) <-ch cleanup(notifEngine, cp, ep, leaderCandidate) }