// TestCheckNotifyUnknownDefault tests the default unknownTemplate. func TestCheckNotifyUnknownDefault(t *testing.T) { defer setup()() nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) nc <- string(b) })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(` template t { subject = template } notification n { post = http://%s/ } alert a { template = t critNotification = n crit = 1 } `, u.Host)) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c) if err != nil { t.Fatal(err) } r := &RunHistory{ Events: map[models.AlertKey]*models.Event{ models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown}, models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown}, }, } s.RunHistory(r) s.CheckNotifications() s.sendUnknownNotifications() gotExpected := false Loop: for { select { case r := <-nc: if r == "a: 2 unknown alerts" { gotExpected = true } else { t.Fatalf("unexpected: %v", r) } // TODO: remove this silly timeout-based test case <-time.After(time.Second): break Loop } } if !gotExpected { t.Errorf("didn't get expected result") } }
func ConfigTest(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) { b, err := ioutil.ReadAll(r.Body) if err != nil { serveError(w, err) return } if len(b) == 0 { serveError(w, fmt.Errorf("empty config")) return } _, err = rule.NewConf("test", schedule.SystemConf.EnabledBackends(), string(b)) if err != nil { fmt.Fprintf(w, err.Error()) } }
func TestIncidentIds(t *testing.T) { defer setup()() c, err := rule.NewConf("", conf.EnabledBackends{}, ` alert a { crit = 1 } `) if err != nil { t.Fatal(err) } s, _ := initSched(&conf.SystemConf{}, c) ak := models.NewAlertKey("a", nil) r := &RunHistory{ Events: map[models.AlertKey]*models.Event{ ak: {Status: models.StWarning}, }, } expect := func(id int64) { incident, err := s.DataAccess.State().GetLatestIncident(ak) if err != nil { t.Fatal(err) } if incident.Id != id { t.Fatalf("Expeted incident id %d. Got %d.", id, incident.Id) } } s.RunHistory(r) expect(1) r.Events[ak].Status = models.StNormal s.RunHistory(r) expect(1) r.Events[ak].Status = models.StWarning s.RunHistory(r) expect(1) r.Events[ak].Status = models.StNormal s.RunHistory(r) err = s.ActionByAlertKey("", "", models.ActionClose, ak) if err != nil { t.Fatal(err) } r.Events[ak].Status = models.StWarning s.RunHistory(r) expect(2) }
func TestCheckSilence(t *testing.T) { defer setup()() done := make(chan bool, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { done <- true })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(` template t { subject = "test" body = "test" } notification n { post = http://%s/ } alert a { template = t warnNotification = n warn = 1 } `, u.Host)) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{}, c) if err != nil { t.Fatal(err) } _, err = s.AddSilence(utcNow().Add(-time.Hour), utcNow().Add(time.Hour), "a", "", false, true, "", "user", "message") if err != nil { t.Fatal(err) } check(s, utcNow()) s.CheckNotifications() select { case <-done: t.Fatal("silenced notification was sent") case <-time.After(time.Second * 2): // Timeout *probably* means the silence worked } }
func TestErrorTemplate(t *testing.T) { c, err := rule.NewConf("", conf.EnabledBackends{}, ` template t { body = {{.Eval "invalid"}} } alert a { template = t crit = 1 } `) if err != nil { t.Fatal(err) } _, err = procRule(nil, c, c.Alerts["a"], time.Time{}, false, "", "") if err != nil { t.Fatal(err) } }
func TestCheckNotify(t *testing.T) { defer setup()() nc := make(chan string) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) nc <- string(b) })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(` template t { subject = {{.Last.Status}} } notification n { post = http://%s/ } alert a { template = t warnNotification = n warn = 1 } `, u.Host)) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{}, c) if err != nil { t.Fatal(err) } check(s, utcNow()) s.CheckNotifications() select { case r := <-nc: if r != "warning" { t.Fatalf("expected warning, got %v", r) } case <-time.After(time.Second): t.Fatal("failed to receive notification before timeout") } }
func TestActionNotificationTemplates(t *testing.T) { c, err := rule.NewConf("", conf.EnabledBackends{}, ``) if err != nil { t.Fatal(err) } s, _ := initSched(&conf.SystemConf{Hostname: "abc"}, c) data := &actionNotificationContext{} data.ActionType = models.ActionAcknowledge data.Message = "Bad things happened" data.User = "******" data.States = []*models.IncidentState{ { Id: 224, Events: []models.Event{ { Status: models.StCritical, }, }, Alert: "xyz", Subject: "Critical!!", }, } data.schedule = s buf := &bytes.Buffer{} err = actionNotificationBodyTemplate.Execute(buf, data) if err != nil { t.Fatal(err) } if !strings.Contains(buf.String(), "http://abc/incident?id=224") { t.Fatal("Expected link to incident in body") } buf = &bytes.Buffer{} err = actionNotificationSubjectTemplate.Execute(buf, data) if err != nil { t.Fatal(err) } if !strings.Contains(buf.String(), "Batman Acknowledged") { t.Fatal("Expected name and actionType in subject") } }
func buildConfig(r *http.Request) (c conf.RuleConfProvider, a *conf.Alert, hash string, err error) { config, err := ioutil.ReadAll(r.Body) if err != nil { return nil, nil, "", err } c, err = rule.NewConf("Test Config", schedule.SystemConf.EnabledBackends(), string(config)) if err != nil { return nil, nil, "", err } hash, err = sched.DefaultSched.DataAccess.Configs().SaveTempConfig(string(config)) if err != nil { return nil, nil, "", err } alertName := r.FormValue("alert") if alertName == "" { return nil, nil, "", fmt.Errorf("must supply alert to run") } a = c.GetAlert(alertName) if a == nil { return nil, nil, "", fmt.Errorf("alert %s not found", alertName) } return c, a, hash, nil }
func TestActionNotificationGrouping(t *testing.T) { defer setup()() c, err := rule.NewConf("", conf.EnabledBackends{}, ` template t{ subject = 2 } notification n1 { print = true } notification n2{ print = true } notification n3{ print = true runOnActions = true } notification n4{ print = true runOnActions = false } alert a { template = t warnNotification = n1 critNotification = n2 warnNotification = n4 crit = 1 warn = 1 } alert b{ template = t warnNotification = n2 critNotification = n3 crit = 1 warn = 1 } lookup byHost{ entry host=a{ main_contact = n2 } entry host=b{ main_contact = n3 } } alert c{ template = t warnNotification = n1 warnNotification = lookup("byHost", "main_contact") warn = 1 } `) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{}, c) if err != nil { t.Fatal(err) } awarn := models.AlertKey("a{host=w}") acrit := models.AlertKey("a{host=c}") bwarn := models.AlertKey("b{host=w}") bcrit := models.AlertKey("b{host=c}") cA := models.AlertKey("c{host=a}") cB := models.AlertKey("c{host=b}") da := s.DataAccess.State() da.UpdateIncidentState(&models.IncidentState{AlertKey: awarn, Alert: awarn.Name(), Tags: awarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: acrit, Alert: acrit.Name(), Tags: acrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: bwarn, Alert: bwarn.Name(), Tags: bwarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: bcrit, Alert: bcrit.Name(), Tags: bcrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: cA, Alert: cA.Name(), Tags: cA.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: cB, Alert: cB.Name(), Tags: cB.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) groups, err := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB}) if err != nil { t.Fatal(err) } expect := func(not string, aks ...models.AlertKey) { n := c.Notifications[not] actualAks, ok := groups[n] if !ok { t.Fatalf("Notification %s not present in groupings.", not) } if len(actualAks) != len(aks) { t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks)) } for i, ak := range aks { if actualAks[i].AlertKey != ak { t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey, ak) } } } expect("n1", awarn, cA, cB) expect("n2", acrit, bwarn, cA) expect("n3", bcrit, cB) }
func TestCheckNotifyLog(t *testing.T) { defer setup()() nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) nc <- string(b) })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(` template t { subject = {{.Alert.Name}} } notification n { post = http://%s/ } alert a { template = t critNotification = n crit = 1 } alert b { template = t critNotification = n crit = 1 log = true } `, u.Host)) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{}, c) if err != nil { t.Fatal(err) } check(s, utcNow()) s.CheckNotifications() gotA := false gotB := false Loop: for { select { case r := <-nc: if r == "a" && !gotA { gotA = true } else if r == "b" && !gotB { gotB = true } else { t.Errorf("unexpected: %v", r) } // TODO: remove this silly timeout-based test case <-time.After(time.Second): break Loop } } if !gotA { t.Errorf("didn't get expected a") } if !gotB { t.Errorf("didn't get expected b") } status, err := s.DataAccess.State().GetAllOpenIncidents() if err != nil { t.Fatal(err) } for _, st := range status { switch st.AlertKey { case "a{}": if !st.Open { t.Errorf("expected a to be open") } case "b{}": if st.Open { t.Errorf("expected b to be closed") } default: t.Errorf("unexpected alert key %s", st.AlertKey) } } }
func TestCheckFlapping(t *testing.T) { defer setup()() c, err := rule.NewConf("", conf.EnabledBackends{}, ` template t { subject = 1 } notification n { print = true } alert a { warnNotification = n warn = 1 critNotification = n crit = 1 template = t } `) if err != nil { t.Fatal(err) } s, _ := initSched(&conf.SystemConf{}, c) ak := models.NewAlertKey("a", nil) r := &RunHistory{ Events: map[models.AlertKey]*models.Event{ ak: {Status: models.StWarning}, }, } hasNots := func() bool { defer func() { s.pendingNotifications = nil }() if len(s.pendingNotifications) != 1 { return false } for k, v := range s.pendingNotifications { if k.Name != "n" || len(v) != 1 || v[0].Alert != "a" { return false } return true } return false } type stateTransition struct { S models.Status ExpectNots bool } transitions := []stateTransition{ {models.StWarning, true}, {models.StNormal, false}, {models.StWarning, false}, {models.StNormal, false}, {models.StCritical, true}, {models.StWarning, false}, {models.StCritical, false}, } for i, trans := range transitions { r.Events[ak].Status = trans.S s.RunHistory(r) has := hasNots() if has && !trans.ExpectNots { t.Fatalf("unexpected notifications for transition %d.", i) } else if !has && trans.ExpectNots { t.Fatalf("expected notifications for transition %d.", i) } } r.Events[ak].Status = models.StNormal s.RunHistory(r) // Close the alert, so it should notify next time. if err := s.ActionByAlertKey("", "", models.ActionClose, ak); err != nil { t.Fatal(err) } r.Events[ak].Status = models.StWarning s.RunHistory(r) if !hasNots() { t.Fatal("expected notification") } }
func testSched(t *testing.T, st *schedTest) (s *Schedule) { bosunStartupTime = time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC) //pretend we've been running for a while. ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { var req opentsdb.Request if err := json.NewDecoder(r.Body).Decode(&req); err != nil { log.Fatal(err) } var resp opentsdb.ResponseSet for _, rq := range req.Queries { qs := fmt.Sprintf(`q("%s", "%v", "%v")`, rq, req.Start, req.End) q, ok := st.queries[qs] if !ok { t.Errorf("unknown query: %s", qs) return } if q == nil { return // Put nil entry in map to simulate opentsdb error. } resp = append(resp, q...) } if err := json.NewEncoder(w).Encode(&resp); err != nil { log.Fatal(err) } })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } //confs := "tsdbHost = " + u.Host + "\n" + st.conf c, err := rule.NewConf("testconf", conf.EnabledBackends{OpenTSDB: true}, st.conf) if err != nil { t.Error(err) t.Logf("conf:\n%s", st.conf) return } time.Sleep(time.Millisecond * 250) sysConf := &conf.SystemConf{CheckFrequency: conf.Duration{Duration: time.Minute * 5}, DefaultRunEvery: 1, UnknownThreshold: 5, MinGroupSize: 5, OpenTSDBConf: conf.OpenTSDBConf{Host: u.Host, ResponseLimit: 1 << 20}} s, _ = initSched(sysConf, c) for ak, time := range st.touched { s.DataAccess.State().TouchAlertKey(ak, time) } check(s, queryTime) groups, err := s.MarshalGroups(new(miniprofiler.Profile), "") if err != nil { t.Error(err) return } var check func(g *StateGroup) check = func(g *StateGroup) { for _, c := range g.Children { check(c) } if g.AlertKey == "" { return } ss := schedState{string(g.AlertKey), g.Status.String()} v, ok := st.state[ss] if !ok { t.Errorf("unexpected state: %s, %s", g.AlertKey, g.Status) return } if v != g.Active { t.Errorf("bad active: %s, %s", g.AlertKey, g.Status) return } delete(st.state, ss) } for _, v := range groups.Groups.NeedAck { check(v) } for _, v := range groups.Groups.Acknowledged { check(v) } for k := range st.state { t.Errorf("unused state: %s", k) } return s }