Exemple #1
0
func (d *dataAccess) GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) {
	defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUnknownAndUnevalAlertKeys"})()
	conn := d.GetConnection()
	defer conn.Close()

	unknownS, err := redis.Strings(conn.Do("SMEMBERS", statesUnknownKey(alert)))
	if err != nil {
		return nil, nil, slog.Wrap(err)
	}
	unknown := make([]models.AlertKey, len(unknownS))
	for i, u := range unknownS {
		unknown[i] = models.AlertKey(u)
	}

	unEvals, err := redis.Strings(conn.Do("SMEMBERS", statesUnevalKey(alert)))
	if err != nil {
		return nil, nil, slog.Wrap(err)
	}
	unevals := make([]models.AlertKey, len(unEvals))
	for i, u := range unEvals {
		unevals[i] = models.AlertKey(u)
	}

	return unknown, unevals, nil
}
Exemple #2
0
func TestNotifications_RoundTrip(t *testing.T) {

	nd := testData.Notifications()
	notTime := time.Now().UTC().Add(-10 * time.Hour).Truncate(time.Second)
	future := time.Now().UTC().Add(time.Hour).Truncate(time.Second)
	oneMin := time.Now().UTC().Add(time.Minute).Truncate(time.Second)

	// with nothing pending, next time should be an hour from now
	next, err := nd.GetNextNotificationTime()
	check(t, err)
	if next != future {
		t.Fatalf("wrong next time. %s != %s", next, future)
	}

	// add notifications
	err = nd.InsertNotification(models.AlertKey("notak{foo=a}"), "chat", notTime)
	check(t, err)
	err = nd.InsertNotification(models.AlertKey("notak{foo=b}"), "chat", oneMin)
	check(t, err)
	err = nd.InsertNotification(models.AlertKey("notak{foo=c}"), "chat", future)
	check(t, err)

	// next time should be correct
	next, err = nd.GetNextNotificationTime()
	check(t, err)
	if next != notTime {
		t.Fatalf("wrong next time. %s != %s", next, notTime)
	}

	// make sure only one due
	due, err := nd.GetDueNotifications()
	check(t, err)
	if len(due) != 1 {
		t.Fatalf("Wrong number of due notifications. %d != %d", len(due), 1)
	}

	// next time should still be correct
	next, err = nd.GetNextNotificationTime()
	check(t, err)
	if next != notTime {
		t.Fatalf("wrong next time. %s != %s", next, notTime)
	}

	check(t, nd.ClearNotificationsBefore(notTime))
	// next time should be 1 minute
	next, err = nd.GetNextNotificationTime()
	check(t, err)
	if next != oneMin {
		t.Fatalf("wrong next time. %s != %s", next, oneMin)
	}

	check(t, nd.ClearNotifications(models.AlertKey("notak{foo=b}")))
	// next time should be 1 hour
	next, err = nd.GetNextNotificationTime()
	check(t, err)
	if next != future {
		t.Fatalf("wrong next time. %s != %s", next, future)
	}
}
Exemple #3
0
func TestDependency_OtherAlert_UnknownChain(t *testing.T) {
	ab := models.AlertKey("a{host=b}")
	bb := models.AlertKey("b{host=b}")
	cb := models.AlertKey("c{host=b}")
	as := NewStatus(ab)
	as.Touched = queryTime.Add(-time.Hour)
	as.Append(&Event{Status: StNormal})
	bs := NewStatus(ab)
	bs.Touched = queryTime
	bs.Append(&Event{Status: StNormal})
	cs := NewStatus(ab)
	cs.Touched = queryTime
	cs.Append(&Event{Status: StNormal})
	s := testSched(t, &schedTest{
		conf: `
		alert a {
			warn = avg(q("avg:a{host=*}", "5m", "")) && 0
		}

		alert b {
			depends = alert("a", "warn")
			warn = avg(q("avg:b{host=*}", "5m", "")) > 0 
		}

		alert c {
			depends = alert("b", "warn")
			warn = avg(q("avg:b{host=*}", "5m", "")) > 0
		}
		`,
		queries: map[string]opentsdb.ResponseSet{
			`q("avg:a{host=*}", ` + window5Min + `)`: {},
			`q("avg:b{host=*}", ` + window5Min + `)`: {{
				Metric: "b",
				Tags:   opentsdb.TagSet{"host": "b"},
				DPS:    map[string]opentsdb.Point{"0": 0},
			}},
		},
		state: map[schedState]bool{
			schedState{string(ab), "unknown"}: true,
		},
		previous: map[models.AlertKey]*State{
			ab: as,
			bb: bs,
			cb: cs,
		},
	})
	if s.status[ab].Unevaluated {
		t.Errorf("should not be unevaluated: %s", ab)
	}
	if !s.status[bb].Unevaluated {
		t.Errorf("should be unevaluated: %s", bb)
	}
	if !s.status[cb].Unevaluated {
		t.Errorf("should be unevaluated: %s", cb)
	}
}
Exemple #4
0
func TestGroupSets_ByAlert(t *testing.T) {
	aks := map[string]string{
		"a{host=a}": "a on a",
		"a{host=b}": "a on b",
		"a{host=c}": "a on c",
		"a{host=d}": "a on d",
	}
	states := States{}
	for a, sub := range aks {
		ak, err := models.ParseAlertKey(a)
		if err != nil {
			t.Fatal(err)
		}
		states[ak] = &models.IncidentState{AlertKey: models.AlertKey(a), Alert: ak.Name(), Tags: ak.Group().Tags(), Subject: sub}
	}

	groups := states.GroupSets(5)
	if len(groups) != 4 {
		t.Fatalf("Expected 4 unique groups, but found %d.", len(groups))
	}

	groups = states.GroupSets(4)
	if len(groups) != 1 {
		t.Fatalf("Expected 1 unique group, but found %d.", len(groups))
	}
}
Exemple #5
0
func TestGroupSets_Single(t *testing.T) {
	ak := models.AlertKey("a{host=foo}")
	states := States{ak: &models.IncidentState{AlertKey: ak, Alert: "a", Tags: opentsdb.TagSet{"host": "foo"}.Tags(), Subject: "aaa"}}
	groups := states.GroupSets(5)
	if len(groups) != 1 {
		t.Fatalf("Expected 1 group. Found %d.", len(groups))
	}
	if len(groups["a{host=foo}"]) == 0 {
		t.Fatal("Expected alert key but couldn't find it.")
	}
}
Exemple #6
0
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	conn, err := redis.Dial("tcp", server, redis.DialDatabase(db))
	if err != nil {
		return md, slog.Wrap(err)
	}
	defer conn.Close()

	//do a dance to detect proper hscan command for ledis or redis
	hscanCmd := "XHSCAN"
	info, err := redis.String(conn.Do("info", "server"))
	if err != nil {
		return md, slog.Wrap(err)
	}
	if strings.Contains(info, "redis_version") {
		hscanCmd = "HSCAN"
	}

	cursor := "0"
	for {
		vals, err := redis.Values(conn.Do(hscanCmd, collect.RedisCountersKey, cursor))
		if err != nil {
			return md, slog.Wrap(err)
		}
		if len(vals) != 2 {
			return md, fmt.Errorf("Unexpected number of values")
		}
		cursor, err = redis.String(vals[0], nil)
		if err != nil {
			return md, slog.Wrap(err)
		}
		pairs, err := redis.StringMap(vals[1], nil)
		if err != nil {
			return md, slog.Wrap(err)
		}
		for key, val := range pairs {
			ak := models.AlertKey(key)

			v, err := strconv.Atoi(val)
			if err != nil {
				slog.Errorf("Invalid counter value: %s", val)
				continue
			}
			Add(&md, ak.Name(), v, ak.Group(), metadata.Counter, metadata.Count, "")
		}
		if cursor == "" || cursor == "0" {
			break
		}
	}
	return md, nil
}
Exemple #7
0
func (d *dataAccess) GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) {
	conn := d.Get()
	defer conn.Close()

	results, err := redis.Strings(conn.Do("ZRANGEBYSCORE", statesLastTouchedKey(alert), "-inf", time))
	if err != nil {
		return nil, slog.Wrap(err)
	}
	aks := make([]models.AlertKey, len(results))
	for i := range results {
		aks[i] = models.AlertKey(results[i])
	}
	return aks, nil
}
Exemple #8
0
func (d *dataAccess) GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) {
	defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUntouchedSince"})()
	conn := d.GetConnection()
	defer conn.Close()

	results, err := redis.Strings(conn.Do("ZRANGEBYSCORE", statesLastTouchedKey(alert), "-inf", time))
	if err != nil {
		return nil, slog.Wrap(err)
	}
	aks := make([]models.AlertKey, len(results))
	for i := range results {
		aks[i] = models.AlertKey(results[i])
	}
	return aks, nil
}
Exemple #9
0
func (d *dataAccess) GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) {
	conn := d.Get()
	defer conn.Close()

	unknownS, err := redis.Strings(conn.Do("SMEMBERS", statesUnknownKey(alert)))
	if err != nil {
		return nil, nil, slog.Wrap(err)
	}
	unknown := make([]models.AlertKey, len(unknownS))
	for i, u := range unknownS {
		unknown[i] = models.AlertKey(u)
	}

	unEvals, err := redis.Strings(conn.Do("SMEMBERS", statesUnevalKey(alert)))
	if err != nil {
		return nil, nil, slog.Wrap(err)
	}
	unevals := make([]models.AlertKey, len(unEvals))
	for i, u := range unEvals {
		unevals[i] = models.AlertKey(u)
	}

	return unknown, unevals, nil
}
Exemple #10
0
func (d *dataAccess) GetDueNotifications() (map[models.AlertKey]map[string]time.Time, error) {
	conn := d.Get()
	defer conn.Close()
	m, err := redis.Int64Map(conn.Do("ZRANGEBYSCORE", pendingNotificationsKey, 0, time.Now().UTC().Unix(), "WITHSCORES"))
	if err != nil {
		return nil, slog.Wrap(err)
	}
	results := map[models.AlertKey]map[string]time.Time{}
	for key, t := range m {
		last := strings.LastIndex(key, ":")
		if last == -1 {
			continue
		}
		ak, not := models.AlertKey(key[:last]), key[last+1:]
		if results[ak] == nil {
			results[ak] = map[string]time.Time{}
		}
		results[ak][not] = time.Unix(t, 0).UTC()
	}
	return results, err
}
Exemple #11
0
func TestActionNotificationGrouping(t *testing.T) {
	defer setup()()
	c, err := rule.NewConf("", conf.EnabledBackends{}, `
		template t{
			subject = 2
		}
		notification n1 {
			print = true
		}
		notification n2{
			print = true
		}
		notification n3{
			print = true
			runOnActions = true
		}
		notification n4{
			print = true
			runOnActions = false
		}
		alert a {
			template = t
			warnNotification = n1
			critNotification = n2
			warnNotification = n4
			crit = 1
			warn = 1
		}
		alert b{
			template = t
			warnNotification = n2
			critNotification = n3
			crit = 1
			warn = 1
		}
		lookup byHost{
			entry host=a{
				main_contact = n2
			}
			entry host=b{
				main_contact = n3
			}
		}
		alert c{
			template = t
			warnNotification = n1
			warnNotification = lookup("byHost", "main_contact")
			warn = 1
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{}, c)
	if err != nil {
		t.Fatal(err)
	}
	awarn := models.AlertKey("a{host=w}")
	acrit := models.AlertKey("a{host=c}")
	bwarn := models.AlertKey("b{host=w}")
	bcrit := models.AlertKey("b{host=c}")
	cA := models.AlertKey("c{host=a}")
	cB := models.AlertKey("c{host=b}")
	da := s.DataAccess.State()
	da.UpdateIncidentState(&models.IncidentState{AlertKey: awarn, Alert: awarn.Name(), Tags: awarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: acrit, Alert: acrit.Name(), Tags: acrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: bwarn, Alert: bwarn.Name(), Tags: bwarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: bcrit, Alert: bcrit.Name(), Tags: bcrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: cA, Alert: cA.Name(), Tags: cA.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: cB, Alert: cB.Name(), Tags: cB.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})

	groups, err := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB})
	if err != nil {
		t.Fatal(err)
	}
	expect := func(not string, aks ...models.AlertKey) {
		n := c.Notifications[not]
		actualAks, ok := groups[n]
		if !ok {
			t.Fatalf("Notification %s not present in groupings.", not)
		}
		if len(actualAks) != len(aks) {
			t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks))
		}
		for i, ak := range aks {
			if actualAks[i].AlertKey != ak {
				t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey, ak)
			}
		}
	}
	expect("n1", awarn, cA, cB)
	expect("n2", acrit, bwarn, cA)
	expect("n3", bcrit, cB)
}
Exemple #12
0
func TestDependency_OtherAlert_UnknownChain(t *testing.T) {
	defer setup()()
	ab := models.AlertKey("a{host=b}")
	bb := models.AlertKey("b{host=b}")
	cb := models.AlertKey("c{host=b}")

	s := testSched(t, &schedTest{
		conf: `
		alert a {
			warn = avg(q("avg:a{host=*}", "5m", "")) && 0
		}

		alert b {
			depends = alert("a", "warn")
			warn = avg(q("avg:b{host=*}", "5m", "")) > 0 
		}

		alert c {
			depends = alert("b", "warn")
			warn = avg(q("avg:b{host=*}", "5m", "")) > 0
		}
		`,
		queries: map[string]opentsdb.ResponseSet{
			`q("avg:a{host=*}", ` + window5Min + `)`: {},
			`q("avg:b{host=*}", ` + window5Min + `)`: {{
				Metric: "b",
				Tags:   opentsdb.TagSet{"host": "b"},
				DPS:    map[string]opentsdb.Point{"0": 0},
			}},
		},
		state: map[schedState]bool{
			schedState{string(ab), "unknown"}: true,
		},
		touched: map[models.AlertKey]time.Time{
			ab: queryTime.Add(-time.Hour),
			bb: queryTime,
			cb: queryTime,
		},
	})
	check := func(ak models.AlertKey, expec bool) {
		_, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(ak.Name())
		if err != nil {
			t.Fatal(err)
		}
		for _, ak2 := range uneval {
			if ak2 == ak {
				if !expec {
					t.Fatalf("Should not be unevaluated: %s", ak)
				} else {
					return
				}
			}
		}
		if expec {
			t.Fatalf("Should be unevaluated: %s", ak)
		}
	}
	check(ab, false)
	check(bb, true)
	check(cb, true)
}
Exemple #13
0
func TestActionNotificationGrouping(t *testing.T) {
	c, err := conf.New("", `
		template t{
			subject = 2
		}
		notification n1 {
			print = true
		}
		notification n2{
			print = true
		}
		notification n3{
			print = true
			runOnActions = true
		}
		notification n4{
			print = true
			runOnActions = false
		}
		alert a {
			template = t
			warnNotification = n1
			critNotification = n2
			warnNotification = n4
			crit = 1
			warn = 1
		}
		alert b{
			template = t
			warnNotification = n2
			critNotification = n3
			crit = 1
			warn = 1
		}
		lookup byHost{
			entry host=a{
				main_contact = n2
			}
			entry host=b{
				main_contact = n3
			}
		}
		alert c{
			template = t
			warnNotification = n1
			warnNotification = lookup("byHost", "main_contact")
			warn = 1
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(c)
	if err != nil {
		t.Fatal(err)
	}
	awarn := models.AlertKey("a{host=w}")
	acrit := models.AlertKey("a{host=c}")
	bwarn := models.AlertKey("b{host=w}")
	bcrit := models.AlertKey("b{host=c}")
	cA := models.AlertKey("c{host=a}")
	cB := models.AlertKey("c{host=b}")
	s.status[awarn] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}}
	s.status[acrit] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}}
	s.status[bwarn] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}}
	s.status[bcrit] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}}
	s.status[cA] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "a"}, History: []Event{{Status: StWarning}}}
	s.status[cB] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "b"}, History: []Event{{Status: StWarning}}}

	groups := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB})
	expect := func(not string, aks ...models.AlertKey) {
		n := c.Notifications[not]
		actualAks, ok := groups[n]
		if !ok {
			t.Fatalf("Notification %s not present in groupings.", not)
		}
		if len(actualAks) != len(aks) {
			t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks))
		}
		for i, ak := range aks {
			if actualAks[i].AlertKey() != ak {
				t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey(), ak)
			}
		}
	}
	expect("n1", awarn, cA, cB)
	expect("n2", acrit, bwarn, cA)
	expect("n3", bcrit, cB)
}