func processChecks() { for { <-checksChannel // if there's no leader, let's retry for at least 30 seconds in 5 second intervals. retryCount := 0 for !hasLeader() { if retryCount >= 6 { continue } log.Println("There is current no consul-alerts leader... waiting for one.") time.Sleep(5 * time.Second) retryCount++ } if !leaderCandidate.leader { log.Println("Currently not the leader. Ignoring checks.") continue } log.Println("Running health check.") changeThreshold := consulClient.CheckChangeThreshold() for elapsed := 0; elapsed < changeThreshold; elapsed += 10 { consulClient.UpdateCheckData() time.Sleep(10 * time.Second) } consulClient.UpdateCheckData() log.Println("Processing health checks for notification.") alerts := consulClient.NewAlerts() if len(alerts) > 0 { notify(alerts) } } }
func (c *CheckProcessor) handleChecks(checks []consul.Check) { consulClient.LoadConfig() retryCount := 0 for !hasLeader() { if retryCount >= 6 { return } log.Println("There is current no consul-alerts leader... waiting for one.") time.Sleep(5 * time.Second) retryCount++ } if !c.leaderElection.leader { log.Println("Currently not the leader. Ignoring checks.") return } log.Println("Running health check.") changeThreshold := consulClient.CheckChangeThreshold() for elapsed := 0; elapsed < changeThreshold; elapsed += 10 { consulClient.UpdateCheckData() time.Sleep(10 * time.Second) } consulClient.UpdateCheckData() log.Println("Processing health checks for notification.") alerts := consulClient.NewAlerts() if len(alerts) > 0 { c.notify(alerts) } }
func (influxdb *InfluxdbNotifier) Notify(messages Messages) bool { // Make client influxdbClient, err := client.NewHTTPClient(client.HTTPConfig{ Addr: influxdb.Host, Username: influxdb.Username, Password: influxdb.Password, }) if err != nil { log.Println("unable to access influxdb. can't send notification. ", err) return false } bp, _ := client.NewBatchPoints(client.BatchPointsConfig{ Database: influxdb.Database, Precision: "ms", }) influxdb.toBatchPoints(messages, bp) err = influxdbClient.Write(bp) if err != nil { log.Println("unable to send notifications: ", err) return false } log.Println("influxdb notification sent.") return true }
func watchMode(arguments map[string]interface{}) { loglevelString, _ := arguments["--log-level"].(string) if loglevelString != "" { loglevel, err := log.ParseLevel(loglevelString) if err == nil { log.SetLevel(loglevel) } else { log.Println("Log level not set:", err) } } checkMode := arguments["checks"].(bool) eventMode := arguments["event"].(bool) addr := arguments["--alert-addr"].(string) var watchType string switch { case checkMode: watchType = "checks" case eventMode: watchType = "events" } url := fmt.Sprintf("http://%s/v1/process/%s", addr, watchType) resp, err := http.Post(url, "text/json", os.Stdin) if err != nil { log.Println("consul-alert daemon is not running.", err) os.Exit(2) } else { resp.Body.Close() } }
func (ep *EventProcessor) handleEvents(events []consul.Event) { for _, event := range events { log.Println("----------------------------------------") log.Printf("Processing event %s:\n", event.ID) log.Println("----------------------------------------") eventHandlers := consulClient.EventHandlers(event.Name) for _, eventHandler := range eventHandlers { data, err := json.Marshal(&event) if err != nil { log.Println("Unable to read event: ", event) // then what? } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(eventHandler) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running handler: ", err) } else { log.Printf(">>> \n%s -> %s:\n %s\n", event.ID, eventHandler, output) } } log.Printf("Event Processed.\n\n") } }
func (n *NotifEngine) sendCustom(messages notifier.Messages) { for notifName, notifCmd := range consulClient.CustomNotifiers() { filteredMessages := make(notifier.Messages, 0) for _, m := range messages { if boolVal, exists := m.NotifList[notifName]; (exists && boolVal) || len(m.NotifList) == 0 { filteredMessages = append(filteredMessages, m) } } if len(filteredMessages) == 0 { continue } data, err := json.Marshal(&filteredMessages) if err != nil { log.Println("Unable to read messages: ", err) return } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(notifCmd) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running notifier: ", err) } else { log.Println(">>> notification sent to:", notifCmd) } log.Println(output) } }
func processChecks() { for { <-checksChannel for leaderCandidate.Leader() == "" { log.Println("There is current no consul-alerts leader... waiting for one.") time.Sleep(5 * time.Second) } if !leaderCandidate.IsLeader() { log.Println("Currently not the leader. Ignoring checks.") continue } log.Println("Running health check.") changeThreshold := consulClient.CheckChangeThreshold() for elapsed := 0; elapsed < changeThreshold; elapsed += 10 { consulClient.UpdateCheckData() time.Sleep(10 * time.Second) } consulClient.UpdateCheckData() log.Println("Processing health checks for notification.") alerts := consulClient.NewAlerts() if len(alerts) > 0 { notify(alerts) } } }
func (opsgenie *OpsGenieNotifier) Notify(messages Messages) bool { overallStatus, pass, warn, fail := messages.Summary() client := new(ogcli.OpsGenieClient) client.SetApiKey(opsgenie.ApiKey) alertCli, cliErr := client.Alert() if cliErr != nil { log.Println("Opsgenie notification trouble with client") return false } for _, message := range messages { title := fmt.Sprintf("\n%s:%s:%s is %s.", message.Node, message.Service, message.Check, message.Status) content := fmt.Sprintf(header, opsgenie.ClusterName, overallStatus, fail, warn, pass) content += fmt.Sprintf("\n%s:%s:%s is %s.", message.Node, message.Service, message.Check, message.Status) content += fmt.Sprintf("\n%s", message.Output) // create the alert response, alertErr := opsgenie.Send(alertCli, title, content) if alertErr != nil { log.Println("Opsgenie notification trouble.", response.Status) return false } } log.Println("Opsgenie notification send.") return true }
func (awssns *AwsSnsNotifier) Send(subject string, message string) bool { svc := sns.New(session.New(&aws.Config{ Region: aws.String(awssns.Region), })) params := &sns.PublishInput{ Message: aws.String(message), MessageAttributes: map[string]*sns.MessageAttributeValue{ "Key": { DataType: aws.String("String"), StringValue: aws.String("String"), }, }, MessageStructure: aws.String("messageStructure"), Subject: aws.String(subject), TopicArn: aws.String(awssns.TopicArn), } resp, err := svc.Publish(params) if err != nil { log.Println(err.Error()) return false } log.Println(resp) return true }
func runWatcher(address, datacenter, watchType string) { consulAlert := os.Args[0] cmd := exec.Command( "consul", "watch", "-http-addr", address, "-datacenter", datacenter, "-type", watchType, consulAlert, "watch", watchType) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { var exitCode int switch err.(type) { case *exec.ExitError: exitError, _ := err.(*exec.ExitError) status, _ := exitError.Sys().(syscall.WaitStatus) exitCode = status.ExitStatus() log.Println("Shutting down watcher --> Exit Code: ", exitCode) case *exec.Error: exitCode = 1 log.Println("Shutting down watcher --> Something went wrong running consul watch: ", err.Error()) default: exitCode = 127 log.Println("Shutting down watcher --> Unknown error: ", err.Error()) } os.Exit(exitCode) } else { log.Printf("Execution complete.") } }
func NewClient(address, dc, aclToken string) (*ConsulAlertClient, error) { config := consulapi.DefaultConfig() config.Address = address config.Datacenter = dc config.Token = aclToken api, _ := consulapi.NewClient(config) alertConfig := DefaultAlertConfig() client := &ConsulAlertClient{ api: api, config: alertConfig, } try := 1 for { try += try log.Println("Checking consul agent connection...") _, err := client.api.Status().Leader() if err != nil { log.Println("Waiting for consul:", err) if try > 10 { return nil, err } time.Sleep(10000 * time.Millisecond) } else { break } } client.LoadConfig() client.UpdateCheckData() return client, nil }
func (beary *BearyNotifier) postToBeary() bool { data, err := json.Marshal(beary) if err != nil { log.Println("Unable to marshal beary payload:", err) return false } log.Debugf("struct = %+v, json = %s", beary, string(data)) b := bytes.NewBuffer(data) if res, err := http.Post(beary.Url, "application/json", b); err != nil { log.Println("Unable to send data to beary:", err) return false } else { defer res.Body.Close() statusCode := res.StatusCode if statusCode != 200 { body, _ := ioutil.ReadAll(res.Body) log.Println("Unable to notify beary:", string(body)) return false } else { log.Println("Beary notification sent.") return true } } }
func (influxdb *InfluxdbNotifier) Notify(messages Messages) bool { config := &client.ClientConfig{ Host: influxdb.Host, Username: influxdb.Username, Password: influxdb.Password, Database: influxdb.Database, } influxdbClient, err := client.New(config) if err != nil { log.Println("unable to access influxdb. can't send notification. ", err) return false } seriesList := influxdb.toSeries(messages) err = influxdbClient.WriteSeries(seriesList) if err != nil { log.Println("unable to send notifications: ", err) return false } log.Println("influxdb notification sent.") return true }
func processEvent(event consul.Event) { log.Println("----------------------------------------") log.Printf("Processing event %s:\n", event.ID) log.Println("----------------------------------------") eventHandlers := consulClient.EventHandlers(event.Name) for _, eventHandler := range eventHandlers { executeEventHandler(event, eventHandler) } log.Printf("Event Processed.\n\n") }
func toWatchObject(reader io.Reader, v interface{}) { data, err := ioutil.ReadAll(reader) if err != nil { log.Println("stdin read error: ", err) // todo: what to do when can't read? } err = json.Unmarshal(data, v) if err != nil { log.Println("json unmarshall error: ", err) // todo: what if we can't serialise? } }
func (emailNotifier *EmailNotifier) Notify(alerts Messages) bool { overAllStatus, pass, warn, fail := alerts.Summary() nodeMap := mapByNodes(alerts) e := EmailData{ ClusterName: emailNotifier.ClusterName, SystemStatus: overAllStatus, FailCount: fail, WarnCount: warn, PassCount: pass, Nodes: nodeMap, } var tmpl *template.Template var err error if emailNotifier.Template == "" { tmpl, err = template.New("base").Parse(defaultTemplate) } else { tmpl, err = template.ParseFiles(emailNotifier.Template) } if err != nil { log.Println("Template error, unable to send email notification: ", err) return false } var body bytes.Buffer if err := tmpl.Execute(&body, e); err != nil { log.Println("Template error, unable to send email notification: ", err) return false } msg := "" msg += fmt.Sprintf("From: \"%s\" <%s>\n", emailNotifier.SenderAlias, emailNotifier.SenderEmail) msg += fmt.Sprintf("Subject: %s is %s\n", emailNotifier.ClusterName, overAllStatus) msg += "MIME-version: 1.0;\nContent-Type: text/html; charset=\"UTF-8\";\n\n" msg += body.String() addr := fmt.Sprintf("%s:%d", emailNotifier.Url, emailNotifier.Port) auth := smtp.PlainAuth("", emailNotifier.Username, emailNotifier.Password, emailNotifier.Url) if err := smtp.SendMail(addr, auth, emailNotifier.SenderEmail, emailNotifier.Receivers, []byte(msg)); err != nil { log.Println("Unable to send notification:", err) return false } log.Println("Email notification sent.") return true }
func (c *CheckProcessor) notify(alerts []consul.Check) { messages := make([]notifier.Message, len(alerts)) for i, alert := range alerts { notifMap, interval := consulClient.GetProfileInfo(alert.Node, alert.ServiceID, alert.CheckID) messages[i] = notifier.Message{ Node: alert.Node, ServiceId: alert.ServiceID, Service: alert.ServiceName, CheckId: alert.CheckID, Check: alert.Name, Status: alert.Status, Output: alert.Output, Notes: alert.Notes, Interval: interval, IntCount: 1, NotifList: notifMap, Timestamp: time.Now(), } if interval > 0 { switch alert.Status { case "passing": consulClient.DeleteReminder(alert.Node) case "warning", "critical": consulClient.SetReminder(messages[i]) } } } if len(messages) == 0 { log.Println("Nothing to notify.") return } c.notifEngine.queueMessages(messages) }
func healthHandler(w http.ResponseWriter, r *http.Request) { node := r.URL.Query().Get("node") service := r.URL.Query().Get("service") check := r.URL.Query().Get("check") log.Println(node, service, check) status, output := consulClient.CheckStatus(node, service, check) var code int switch status { case "passing": code = 200 case "warning", "critical": code = 503 default: status = "unknown" code = 404 } log.Printf("health status check result for node=%s,service=%s,check=%s: %d", node, service, check, code) var result string if output == "" { result = "" } else { result = fmt.Sprintf("output: %s\n", output) } body := fmt.Sprintf("status: %s\n%s", status, result) w.WriteHeader(code) w.Write([]byte(body)) }
func startAPI(addr string) { err := http.ListenAndServe(addr, nil) if err != nil { log.Println("Error starting Consul-Alerts API", err) os.Exit(1) } }
func (influxdb *InfluxdbNotifier) toBatchPoints(messages Messages, bp client.BatchPoints) { seriesName := influxdb.SeriesName for index, message := range messages { tags := map[string]string{ "node": message.Node, "service": message.Service, "status": message.Status, "serviceId": message.ServiceId, } fields := map[string]interface{}{ "checks": message.Check, "notes": message.Notes, "output": message.Output, } p, err := client.NewPoint(seriesName, tags, fields, message.Timestamp) if err != nil { log.Println("Error: ", err.Error()) } log.Debugf("%s", index) bp.AddPoint(p) } }
func notify(alerts []consul.Check) { messages := make([]notifier.Message, len(alerts)) for i, alert := range alerts { messages[i] = notifier.Message{ Node: alert.Node, ServiceId: alert.ServiceID, Service: alert.ServiceName, CheckId: alert.CheckID, Check: alert.Name, Status: alert.Status, Output: alert.Output, Notes: alert.Notes, Timestamp: time.Now(), } } if len(messages) == 0 { log.Println("Nothing to notify.") return } for _, n := range builtinNotifiers() { n.Notify(messages) } for _, n := range consulClient.CustomNotifiers() { executeHealthNotifier(messages, n) } }
// GetProfileInfo returns profile info for check func (c *ConsulAlertClient) GetProfileInfo(node, serviceID, checkID string) (notifiersList map[string]bool, interval int) { log.Println("Getting profile for node: ", node, " service: ", serviceID, " check: ", checkID) var profile string kvPair, _, _ := c.api.KV().Get(fmt.Sprintf("consul-alerts/config/notif-selection/services/%s", serviceID), nil) if kvPair != nil { profile = string(kvPair.Value) log.Println("service selection key found.") } else if kvPair, _, _ = c.api.KV().Get(fmt.Sprintf("consul-alerts/config/notif-selection/checks/%s", checkID), nil); kvPair != nil { profile = string(kvPair.Value) log.Println("check selection key found.") } else if kvPair, _, _ = c.api.KV().Get(fmt.Sprintf("consul-alerts/config/notif-selection/hosts/%s", node), nil); kvPair != nil { profile = string(kvPair.Value) log.Println("host selection key found.") } else { profile = "default" } key := fmt.Sprintf("consul-alerts/config/notif-profiles/%s", profile) log.Println("profile key: ", key) kvPair, _, _ = c.api.KV().Get(key, nil) if kvPair == nil { log.Println("profile key not found.") return } var checkProfile ProfileInfo json.Unmarshal(kvPair.Value, &checkProfile) notifiersList = checkProfile.NotifList interval = checkProfile.Interval log.Println("Interval: ", interval, " List: ", notifiersList) return }
func (ep *EventProcessor) eventHandler(w http.ResponseWriter, r *http.Request) { consulClient.LoadConfig() if ep.firstRun { log.Println("Now watching for events.") ep.firstRun = false // set status to OK return } if !consulClient.EventsEnabled() { log.Println("Event handling disabled. Event ignored.") // set to OK? return } var events []consul.Event toWatchObject(r.Body, &events) ep.inChan <- events // set status to OK }
// GetReminders returns list of reminders func (c *ConsulAlertClient) GetReminders() []notifier.Message { remindersList, _, _ := c.api.KV().List("consul-alerts/reminders", nil) var messages []notifier.Message for _, kvpair := range remindersList { var message notifier.Message json.Unmarshal(kvpair.Value, &message) messages = append(messages, message) } log.Println("Getting reminders") return messages }
func executeEventHandler(event consul.Event, eventHandler string) { data, err := json.Marshal(&event) if err != nil { log.Println("Unable to read event: ", event) // then what? } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(eventHandler) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running handler: ", err) } else { log.Printf(">>> \n%s -> %s:\n %s\n", event.ID, eventHandler, output) } }
func (c *CheckProcessor) reminderRun() { if !c.leaderElection.leader { log.Println("Currently not the leader. Ignoring reminders.") return } log.Println("Running reminder check.") messages := consulClient.GetReminders() filteredMessages := make(notifier.Messages, 0) for _, message := range messages { duration := time.Since(message.RmdCheck) durMins := int(math.Ceil(duration.Minutes())) log.Println("Reminder message duration minutes: ", durMins) if durMins >= message.Interval { message.RmdCheck = time.Now() consulClient.SetReminder(message) filteredMessages = append(filteredMessages, message) } } if len(filteredMessages) > 0 { c.notifEngine.queueMessages(filteredMessages) } }
func executeHealthNotifier(messages []notifier.Message, notifCmd string) { data, err := json.Marshal(&messages) if err != nil { log.Println("Unable to read messages: ", err) return } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(notifCmd) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running notifier: ", err) } else { log.Println(">>> notification sent to:", notifCmd) } log.Println(output) }
func (n *NotifEngine) sendCustom(messages notifier.Messages) { for _, notifCmd := range consulClient.CustomNotifiers() { data, err := json.Marshal(&messages) if err != nil { log.Println("Unable to read messages: ", err) return } input := bytes.NewReader(data) output := new(bytes.Buffer) cmd := exec.Command(notifCmd) cmd.Stdin = input cmd.Stdout = output cmd.Stderr = output if err := cmd.Run(); err != nil { log.Println("error running notifier: ", err) } else { log.Println(">>> notification sent to:", notifCmd) } log.Println(output) } }
func daemonMode(arguments map[string]interface{}) { addr := arguments["--alert-addr"].(string) url := fmt.Sprintf("http://%s/v1/info", addr) resp, err := http.Get(url) if err == nil && resp.StatusCode == 201 { version := resp.Header.Get("version") resp.Body.Close() log.Printf("consul-alert daemon already running version: %s", version) os.Exit(1) } consulAclToken := arguments["--consul-acl-token"].(string) consulAddr := arguments["--consul-addr"].(string) consulDc := arguments["--consul-dc"].(string) watchChecks := arguments["--watch-checks"].(bool) watchEvents := arguments["--watch-events"].(bool) consulClient, err = consul.NewClient(consulAddr, consulDc, consulAclToken) if err != nil { log.Println("Cluster has no leader or is unreacheable.", err) os.Exit(3) } hostname, _ := os.Hostname() log.Println("Consul ACL Token:", consulAclToken) log.Println("Consul Alerts daemon started") log.Println("Consul Alerts Host:", hostname) log.Println("Consul Agent:", consulAddr) log.Println("Consul Datacenter:", consulDc) leaderCandidate := startLeaderElection(consulAddr, consulDc, consulAclToken) notifEngine := startNotifEngine() if watchChecks { go runWatcher(consulAddr, consulDc, "checks") } if watchEvents { go runWatcher(consulAddr, consulDc, "event") } ep := startEventProcessor() cp := startCheckProcessor(leaderCandidate, notifEngine) http.HandleFunc("/v1/info", infoHandler) http.HandleFunc("/v1/process/events", ep.eventHandler) http.HandleFunc("/v1/process/checks", cp.checkHandler) http.HandleFunc("/v1/health", healthHandler) go http.ListenAndServe(addr, nil) ch := make(chan os.Signal) signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM, os.Interrupt) <-ch cleanup(notifEngine, cp, ep, leaderCandidate) }
func (c *CheckProcessor) checkHandler(w http.ResponseWriter, r *http.Request) { consulClient.LoadConfig() if c.firstRun { log.Println("Now watching for health changes.") c.firstRun = false w.WriteHeader(200) return } if !consulClient.ChecksEnabled() { log.Println("Checks handling disabled. Checks ignored.") w.WriteHeader(200) return } if len(c.inChan) == 1 { <-c.inChan } var checks []consul.Check toWatchObject(r.Body, &checks) c.inChan <- checks w.WriteHeader(200) }