func (d *dataAccess) GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) { defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUnknownAndUnevalAlertKeys"})() conn := d.GetConnection() defer conn.Close() unknownS, err := redis.Strings(conn.Do("SMEMBERS", statesUnknownKey(alert))) if err != nil { return nil, nil, slog.Wrap(err) } unknown := make([]models.AlertKey, len(unknownS)) for i, u := range unknownS { unknown[i] = models.AlertKey(u) } unEvals, err := redis.Strings(conn.Do("SMEMBERS", statesUnevalKey(alert))) if err != nil { return nil, nil, slog.Wrap(err) } unevals := make([]models.AlertKey, len(unEvals)) for i, u := range unEvals { unevals[i] = models.AlertKey(u) } return unknown, unevals, nil }
func TestNotifications_RoundTrip(t *testing.T) { nd := testData.Notifications() notTime := time.Now().UTC().Add(-10 * time.Hour).Truncate(time.Second) future := time.Now().UTC().Add(time.Hour).Truncate(time.Second) oneMin := time.Now().UTC().Add(time.Minute).Truncate(time.Second) // with nothing pending, next time should be an hour from now next, err := nd.GetNextNotificationTime() check(t, err) if next != future { t.Fatalf("wrong next time. %s != %s", next, future) } // add notifications err = nd.InsertNotification(models.AlertKey("notak{foo=a}"), "chat", notTime) check(t, err) err = nd.InsertNotification(models.AlertKey("notak{foo=b}"), "chat", oneMin) check(t, err) err = nd.InsertNotification(models.AlertKey("notak{foo=c}"), "chat", future) check(t, err) // next time should be correct next, err = nd.GetNextNotificationTime() check(t, err) if next != notTime { t.Fatalf("wrong next time. %s != %s", next, notTime) } // make sure only one due due, err := nd.GetDueNotifications() check(t, err) if len(due) != 1 { t.Fatalf("Wrong number of due notifications. %d != %d", len(due), 1) } // next time should still be correct next, err = nd.GetNextNotificationTime() check(t, err) if next != notTime { t.Fatalf("wrong next time. %s != %s", next, notTime) } check(t, nd.ClearNotificationsBefore(notTime)) // next time should be 1 minute next, err = nd.GetNextNotificationTime() check(t, err) if next != oneMin { t.Fatalf("wrong next time. %s != %s", next, oneMin) } check(t, nd.ClearNotifications(models.AlertKey("notak{foo=b}"))) // next time should be 1 hour next, err = nd.GetNextNotificationTime() check(t, err) if next != future { t.Fatalf("wrong next time. %s != %s", next, future) } }
func TestDependency_OtherAlert_UnknownChain(t *testing.T) { ab := models.AlertKey("a{host=b}") bb := models.AlertKey("b{host=b}") cb := models.AlertKey("c{host=b}") as := NewStatus(ab) as.Touched = queryTime.Add(-time.Hour) as.Append(&Event{Status: StNormal}) bs := NewStatus(ab) bs.Touched = queryTime bs.Append(&Event{Status: StNormal}) cs := NewStatus(ab) cs.Touched = queryTime cs.Append(&Event{Status: StNormal}) s := testSched(t, &schedTest{ conf: ` alert a { warn = avg(q("avg:a{host=*}", "5m", "")) && 0 } alert b { depends = alert("a", "warn") warn = avg(q("avg:b{host=*}", "5m", "")) > 0 } alert c { depends = alert("b", "warn") warn = avg(q("avg:b{host=*}", "5m", "")) > 0 } `, queries: map[string]opentsdb.ResponseSet{ `q("avg:a{host=*}", ` + window5Min + `)`: {}, `q("avg:b{host=*}", ` + window5Min + `)`: {{ Metric: "b", Tags: opentsdb.TagSet{"host": "b"}, DPS: map[string]opentsdb.Point{"0": 0}, }}, }, state: map[schedState]bool{ schedState{string(ab), "unknown"}: true, }, previous: map[models.AlertKey]*State{ ab: as, bb: bs, cb: cs, }, }) if s.status[ab].Unevaluated { t.Errorf("should not be unevaluated: %s", ab) } if !s.status[bb].Unevaluated { t.Errorf("should be unevaluated: %s", bb) } if !s.status[cb].Unevaluated { t.Errorf("should be unevaluated: %s", cb) } }
func TestGroupSets_ByAlert(t *testing.T) { aks := map[string]string{ "a{host=a}": "a on a", "a{host=b}": "a on b", "a{host=c}": "a on c", "a{host=d}": "a on d", } states := States{} for a, sub := range aks { ak, err := models.ParseAlertKey(a) if err != nil { t.Fatal(err) } states[ak] = &models.IncidentState{AlertKey: models.AlertKey(a), Alert: ak.Name(), Tags: ak.Group().Tags(), Subject: sub} } groups := states.GroupSets(5) if len(groups) != 4 { t.Fatalf("Expected 4 unique groups, but found %d.", len(groups)) } groups = states.GroupSets(4) if len(groups) != 1 { t.Fatalf("Expected 1 unique group, but found %d.", len(groups)) } }
func TestGroupSets_Single(t *testing.T) { ak := models.AlertKey("a{host=foo}") states := States{ak: &models.IncidentState{AlertKey: ak, Alert: "a", Tags: opentsdb.TagSet{"host": "foo"}.Tags(), Subject: "aaa"}} groups := states.GroupSets(5) if len(groups) != 1 { t.Fatalf("Expected 1 group. Found %d.", len(groups)) } if len(groups["a{host=foo}"]) == 0 { t.Fatal("Expected alert key but couldn't find it.") } }
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint conn, err := redis.Dial("tcp", server, redis.DialDatabase(db)) if err != nil { return md, slog.Wrap(err) } defer conn.Close() //do a dance to detect proper hscan command for ledis or redis hscanCmd := "XHSCAN" info, err := redis.String(conn.Do("info", "server")) if err != nil { return md, slog.Wrap(err) } if strings.Contains(info, "redis_version") { hscanCmd = "HSCAN" } cursor := "0" for { vals, err := redis.Values(conn.Do(hscanCmd, collect.RedisCountersKey, cursor)) if err != nil { return md, slog.Wrap(err) } if len(vals) != 2 { return md, fmt.Errorf("Unexpected number of values") } cursor, err = redis.String(vals[0], nil) if err != nil { return md, slog.Wrap(err) } pairs, err := redis.StringMap(vals[1], nil) if err != nil { return md, slog.Wrap(err) } for key, val := range pairs { ak := models.AlertKey(key) v, err := strconv.Atoi(val) if err != nil { slog.Errorf("Invalid counter value: %s", val) continue } Add(&md, ak.Name(), v, ak.Group(), metadata.Counter, metadata.Count, "") } if cursor == "" || cursor == "0" { break } } return md, nil }
func (d *dataAccess) GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) { conn := d.Get() defer conn.Close() results, err := redis.Strings(conn.Do("ZRANGEBYSCORE", statesLastTouchedKey(alert), "-inf", time)) if err != nil { return nil, slog.Wrap(err) } aks := make([]models.AlertKey, len(results)) for i := range results { aks[i] = models.AlertKey(results[i]) } return aks, nil }
func (d *dataAccess) GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) { defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUntouchedSince"})() conn := d.GetConnection() defer conn.Close() results, err := redis.Strings(conn.Do("ZRANGEBYSCORE", statesLastTouchedKey(alert), "-inf", time)) if err != nil { return nil, slog.Wrap(err) } aks := make([]models.AlertKey, len(results)) for i := range results { aks[i] = models.AlertKey(results[i]) } return aks, nil }
func (d *dataAccess) GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) { conn := d.Get() defer conn.Close() unknownS, err := redis.Strings(conn.Do("SMEMBERS", statesUnknownKey(alert))) if err != nil { return nil, nil, slog.Wrap(err) } unknown := make([]models.AlertKey, len(unknownS)) for i, u := range unknownS { unknown[i] = models.AlertKey(u) } unEvals, err := redis.Strings(conn.Do("SMEMBERS", statesUnevalKey(alert))) if err != nil { return nil, nil, slog.Wrap(err) } unevals := make([]models.AlertKey, len(unEvals)) for i, u := range unEvals { unevals[i] = models.AlertKey(u) } return unknown, unevals, nil }
func (d *dataAccess) GetDueNotifications() (map[models.AlertKey]map[string]time.Time, error) { conn := d.Get() defer conn.Close() m, err := redis.Int64Map(conn.Do("ZRANGEBYSCORE", pendingNotificationsKey, 0, time.Now().UTC().Unix(), "WITHSCORES")) if err != nil { return nil, slog.Wrap(err) } results := map[models.AlertKey]map[string]time.Time{} for key, t := range m { last := strings.LastIndex(key, ":") if last == -1 { continue } ak, not := models.AlertKey(key[:last]), key[last+1:] if results[ak] == nil { results[ak] = map[string]time.Time{} } results[ak][not] = time.Unix(t, 0).UTC() } return results, err }
func TestActionNotificationGrouping(t *testing.T) { defer setup()() c, err := rule.NewConf("", conf.EnabledBackends{}, ` template t{ subject = 2 } notification n1 { print = true } notification n2{ print = true } notification n3{ print = true runOnActions = true } notification n4{ print = true runOnActions = false } alert a { template = t warnNotification = n1 critNotification = n2 warnNotification = n4 crit = 1 warn = 1 } alert b{ template = t warnNotification = n2 critNotification = n3 crit = 1 warn = 1 } lookup byHost{ entry host=a{ main_contact = n2 } entry host=b{ main_contact = n3 } } alert c{ template = t warnNotification = n1 warnNotification = lookup("byHost", "main_contact") warn = 1 } `) if err != nil { t.Fatal(err) } s, err := initSched(&conf.SystemConf{}, c) if err != nil { t.Fatal(err) } awarn := models.AlertKey("a{host=w}") acrit := models.AlertKey("a{host=c}") bwarn := models.AlertKey("b{host=w}") bcrit := models.AlertKey("b{host=c}") cA := models.AlertKey("c{host=a}") cB := models.AlertKey("c{host=b}") da := s.DataAccess.State() da.UpdateIncidentState(&models.IncidentState{AlertKey: awarn, Alert: awarn.Name(), Tags: awarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: acrit, Alert: acrit.Name(), Tags: acrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: bwarn, Alert: bwarn.Name(), Tags: bwarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: bcrit, Alert: bcrit.Name(), Tags: bcrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: cA, Alert: cA.Name(), Tags: cA.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) da.UpdateIncidentState(&models.IncidentState{AlertKey: cB, Alert: cB.Name(), Tags: cB.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) groups, err := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB}) if err != nil { t.Fatal(err) } expect := func(not string, aks ...models.AlertKey) { n := c.Notifications[not] actualAks, ok := groups[n] if !ok { t.Fatalf("Notification %s not present in groupings.", not) } if len(actualAks) != len(aks) { t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks)) } for i, ak := range aks { if actualAks[i].AlertKey != ak { t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey, ak) } } } expect("n1", awarn, cA, cB) expect("n2", acrit, bwarn, cA) expect("n3", bcrit, cB) }
func TestDependency_OtherAlert_UnknownChain(t *testing.T) { defer setup()() ab := models.AlertKey("a{host=b}") bb := models.AlertKey("b{host=b}") cb := models.AlertKey("c{host=b}") s := testSched(t, &schedTest{ conf: ` alert a { warn = avg(q("avg:a{host=*}", "5m", "")) && 0 } alert b { depends = alert("a", "warn") warn = avg(q("avg:b{host=*}", "5m", "")) > 0 } alert c { depends = alert("b", "warn") warn = avg(q("avg:b{host=*}", "5m", "")) > 0 } `, queries: map[string]opentsdb.ResponseSet{ `q("avg:a{host=*}", ` + window5Min + `)`: {}, `q("avg:b{host=*}", ` + window5Min + `)`: {{ Metric: "b", Tags: opentsdb.TagSet{"host": "b"}, DPS: map[string]opentsdb.Point{"0": 0}, }}, }, state: map[schedState]bool{ schedState{string(ab), "unknown"}: true, }, touched: map[models.AlertKey]time.Time{ ab: queryTime.Add(-time.Hour), bb: queryTime, cb: queryTime, }, }) check := func(ak models.AlertKey, expec bool) { _, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(ak.Name()) if err != nil { t.Fatal(err) } for _, ak2 := range uneval { if ak2 == ak { if !expec { t.Fatalf("Should not be unevaluated: %s", ak) } else { return } } } if expec { t.Fatalf("Should be unevaluated: %s", ak) } } check(ab, false) check(bb, true) check(cb, true) }
func TestActionNotificationGrouping(t *testing.T) { c, err := conf.New("", ` template t{ subject = 2 } notification n1 { print = true } notification n2{ print = true } notification n3{ print = true runOnActions = true } notification n4{ print = true runOnActions = false } alert a { template = t warnNotification = n1 critNotification = n2 warnNotification = n4 crit = 1 warn = 1 } alert b{ template = t warnNotification = n2 critNotification = n3 crit = 1 warn = 1 } lookup byHost{ entry host=a{ main_contact = n2 } entry host=b{ main_contact = n3 } } alert c{ template = t warnNotification = n1 warnNotification = lookup("byHost", "main_contact") warn = 1 } `) if err != nil { t.Fatal(err) } s, err := initSched(c) if err != nil { t.Fatal(err) } awarn := models.AlertKey("a{host=w}") acrit := models.AlertKey("a{host=c}") bwarn := models.AlertKey("b{host=w}") bcrit := models.AlertKey("b{host=c}") cA := models.AlertKey("c{host=a}") cB := models.AlertKey("c{host=b}") s.status[awarn] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}} s.status[acrit] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}} s.status[bwarn] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}} s.status[bcrit] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}} s.status[cA] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "a"}, History: []Event{{Status: StWarning}}} s.status[cB] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "b"}, History: []Event{{Status: StWarning}}} groups := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB}) expect := func(not string, aks ...models.AlertKey) { n := c.Notifications[not] actualAks, ok := groups[n] if !ok { t.Fatalf("Notification %s not present in groupings.", not) } if len(actualAks) != len(aks) { t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks)) } for i, ak := range aks { if actualAks[i].AlertKey() != ak { t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey(), ak) } } } expect("n1", awarn, cA, cB) expect("n2", acrit, bwarn, cA) expect("n3", bcrit, cB) }