// TestCheckNotifyUnknownDefault tests the default unknownTemplate. func TestCheckNotifyUnknownDefault(t *testing.T) { s := new(Schedule) nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) nc <- string(b) })) defer ts.Close() u, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } c, err := conf.New("", fmt.Sprintf(` template t { subject = template } notification n { post = http://%s/ } alert a { template = t critNotification = n crit = 1 } `, u.Host)) if err != nil { t.Fatal(err) } c.StateFile = "" err = s.Init(c) if err != nil { t.Fatal(err) } r := &RunHistory{ Events: map[expr.AlertKey]*Event{ expr.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: StUnknown}, expr.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: StUnknown}, }, } s.RunHistory(r) s.CheckNotifications() gotExpected := false Loop: for { select { case r := <-nc: if r == "a: 2 unknown alerts" { gotExpected = true } else { t.Fatalf("unexpected: %v", r) } // TODO: remove this silly timeout-based test case <-time.After(time.Second): break Loop } } if !gotExpected { t.Errorf("didn't get expected result") } }
func TestError_To_Unknown(t *testing.T) { ak := expr.NewAlertKey("a", nil) state := NewStatus(ak) state.Touched = queryTime.Add(-10 * time.Minute) state.Append(&Event{Status: StError, Time: state.Touched}) s := testSched(t, &schedTest{ conf: `alert a { crit = avg(q("avg:m{a=*}", "5m", "")) > 0 }`, queries: map[string]opentsdb.ResponseSet{ `q("avg:m{a=*}", ` + window5Min + `)`: {}, }, state: map[schedState]bool{ //No abnormal events }, previous: map[expr.AlertKey]*State{ ak: state, }, }) st := s.GetStatus(expr.AlertKey(ak)) if st.Status() != StError { t.Errorf("Expected status to be %s but was %s", StError, st.Status()) } }
func TestIncidentIds(t *testing.T) { s := new(Schedule) c, err := conf.New("", ` alert a { crit = 1 } `) if err != nil { t.Fatal(err) } c.StateFile = "" s.Init(c) ak := expr.NewAlertKey("a", nil) r := &RunHistory{ Events: map[expr.AlertKey]*Event{ ak: {Status: StWarning}, }, } expect := func(id uint64) { if s.status[ak].Last().IncidentId != id { t.Fatalf("Expeted incident id %d. Got %d.", id, s.status[ak].Last().IncidentId) } } s.RunHistory(r) expect(1) r.Events[ak].Status = StNormal r.Events[ak].IncidentId = 0 s.RunHistory(r) expect(1) r.Events[ak].Status = StWarning r.Events[ak].IncidentId = 0 s.RunHistory(r) expect(1) r.Events[ak].Status = StNormal r.Events[ak].IncidentId = 0 s.RunHistory(r) err = s.Action("", "", ActionClose, ak) if err != nil { t.Fatal(err) } r.Events[ak].Status = StWarning r.Events[ak].IncidentId = 0 s.RunHistory(r) expect(2) }
// TestCheckCritUnknownEmpty checks that if an alert goes normal -> crit -> // unknown, it's body and subject are empty. This is because we should not // keep around the crit template renders if we are unknown. func TestCheckCritUnknownEmpty(t *testing.T) { s := new(Schedule) c, err := conf.New("", ` template t { subject = 1 body = 2 } alert a { crit = 1 template = t } `) if err != nil { t.Fatal(err) } c.StateFile = "" s.Init(c) ak := expr.NewAlertKey("a", nil) r := &RunHistory{ Events: map[expr.AlertKey]*Event{ ak: {Status: StNormal}, }, } verify := func(empty bool) { st := s.GetStatus(ak) if empty { if st.Body != "" || st.Subject != "" { t.Fatalf("expected empty body and subject") } } else { if st.Body != "<html><head></head><body>2</body></html>" || st.Subject != "1" { t.Fatalf("expected body and subject") } } } s.RunHistory(r) verify(true) r.Events[ak].Status = StCritical s.RunHistory(r) verify(false) r.Events[ak].Status = StUnknown s.RunHistory(r) verify(true) r.Events[ak].Status = StNormal s.RunHistory(r) verify(true) }
func readDps(r io.Reader, data map[expr.AlertKey]int) { gr, err := gzip.NewReader(r) if err != nil { fatal(err) } jr := json.NewDecoder(gr) mdp := []*opentsdb.DataPoint{} err = jr.Decode(&mdp) if err != nil { fatal(err) } for _, dp := range mdp { ak := expr.NewAlertKey(dp.Metric, dp.Tags) n, ok := data[ak] if ok { data[ak] = n + 1 } else { data[ak] = 1 } } }
func TestDifferentSchedules(t *testing.T) { s := new(Schedule) c, err := conf.New("", ` alert a { crit = 1 runEvery = 3 } alert b { crit = 1 runEvery = 1 } `) if err != nil { t.Fatal(err) } c.StateFile = "" check := func(interval uint64, alerts ...string) { s.Init(c) _, err = s.Check(nil, time.Now(), interval) if err != nil { t.Fatal(err) } if len(alerts) != len(s.status) { t.Errorf("Expected %d statuses, but have %d for interval %d.", len(alerts), len(s.status), interval) } for _, alert := range alerts { if state, ok := s.status[expr.NewAlertKey(alert, nil)]; !ok || state.Status() != StCritical { t.Fatalf("Expected results for alert %s in interval %d.", alert, interval) } } } check(0, "a", "b") check(1, "b") check(2, "b") check(3, "a", "b") }
func (s *Schedule) executeExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr) (*expr.Results, error) { if e == nil { return nil, nil } results, _, err := e.Execute(rh.Context, rh.GraphiteContext, rh.Logstash, rh.Cache, T, rh.Start, 0, a.UnjoinedOK, s.Search, s.Conf.AlertSquelched(a), rh) if err != nil { ak := expr.NewAlertKey(a.Name, nil) rh.Events[ak] = &Event{ Status: StError, Error: &Result{ Result: &expr.Result{ Computations: []expr.Computation{ { Text: e.String(), Value: err.Error(), }, }, }, }, } return nil, err } return results, err }
func (s *State) AlertKey() expr.AlertKey { return expr.NewAlertKey(s.Alert, s.Group) }
func (c *Conf) loadLookup(s *parse.SectionNode) { name := s.Name.Text if _, ok := c.Lookups[name]; ok { c.errorf("duplicate lookup name: %s", name) } l := Lookup{ Name: name, } l.Text = s.RawText var lookupTags opentsdb.TagSet saw := make(map[string]bool) for _, n := range s.Nodes.Nodes { c.at(n) switch n := n.(type) { case *parse.SectionNode: if n.SectionType.Text != "entry" { c.errorf("unexpected subsection type") } tags, err := opentsdb.ParseTags(n.Name.Text) if tags == nil && err != nil { c.error(err) } if _, ok := saw[tags.String()]; ok { c.errorf("duplicate entry") } saw[tags.String()] = true if len(tags) == 0 { c.errorf("lookup entries require tags") } empty := make(opentsdb.TagSet) for k := range tags { empty[k] = "" } if len(lookupTags) == 0 { lookupTags = empty for k := range empty { l.Tags = append(l.Tags, k) } } else if !lookupTags.Equal(empty) { c.errorf("lookup tags mismatch, expected %v", lookupTags) } e := Entry{ Def: n.RawText, Name: n.Name.Text, ExprEntry: &ExprEntry{ AlertKey: expr.NewAlertKey("", tags), Values: make(map[string]string), }, } for _, en := range n.Nodes.Nodes { c.at(en) switch en := en.(type) { case *parse.PairNode: e.Values[en.Key.Text] = en.Val.Text default: c.errorf("unexpected node") } } l.Entries = append(l.Entries, &e) default: c.errorf("unexpected node") } } c.at(s) c.Lookups[name] = &l }
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus Status, ignore expr.AlertKeys) (alerts expr.AlertKeys, err error) { if e == nil { return } defer func() { if err == nil { return } collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1) slog.Errorln(err) }() results, err := s.executeExpr(T, rh, a, e) if err != nil { return nil, err } Loop: for _, r := range results.Results { if s.Conf.Squelched(a, r.Group) { continue } ak := expr.NewAlertKey(a.Name, r.Group) for _, v := range ignore { if ak == v { continue Loop } } var n float64 switch v := r.Value.(type) { case expr.Number: n = float64(v) case expr.Scalar: n = float64(v) default: err = fmt.Errorf("expected number or scalar") return } event := rh.Events[ak] if event == nil { event = new(Event) rh.Events[ak] = event } result := &Result{ Result: r, Expr: e.String(), } switch checkStatus { case StWarning: event.Warn = result case StCritical: event.Crit = result } status := checkStatus if math.IsNaN(n) { status = StError } else if n == 0 { status = StNormal } if status != StNormal { alerts = append(alerts, ak) } if status > rh.Events[ak].Status { event.Status = status } } return }
func TestCheckFlapping(t *testing.T) { s := new(Schedule) c, err := conf.New("", ` template t { subject = 1 } notification n { print = true } alert a { warnNotification = n warn = 1 critNotification = n crit = 1 template = t } `) if err != nil { t.Fatal(err) } c.StateFile = "" s.Init(c) ak := expr.NewAlertKey("a", nil) r := &RunHistory{ Events: map[expr.AlertKey]*Event{ ak: {Status: StWarning}, }, } hasNots := func() bool { defer func() { s.notifications = nil }() if len(s.notifications) != 1 { return false } for k, v := range s.notifications { if k.Name != "n" || len(v) != 1 || v[0].Alert != "a" { return false } return true } return false } s.RunHistory(r) if !hasNots() { t.Fatalf("expected notification: %v", s.notifications) } r.Events[ak].Status = StNormal s.RunHistory(r) if hasNots() { t.Fatal("unexpected notification") } r.Events[ak].Status = StWarning s.RunHistory(r) if hasNots() { t.Fatal("unexpected notification") } r.Events[ak].Status = StNormal s.RunHistory(r) if hasNots() { t.Fatal("unexpected notification") } r.Events[ak].Status = StCritical s.RunHistory(r) if !hasNots() { t.Fatal("expected notification") } r.Events[ak].Status = StNormal s.RunHistory(r) if hasNots() { t.Fatal("unexpected notification") } s.RunHistory(r) // Close the alert, so it should notify next time. if err := s.Action("", "", ActionClose, ak); err != nil { t.Fatal(err) } r.Events[ak].Status = StWarning s.RunHistory(r) if !hasNots() { t.Fatal("expected notification") } }