Esempio n. 1
0
// TestCheckNotifyUnknownDefault tests the default unknownTemplate.
func TestCheckNotifyUnknownDefault(t *testing.T) {
	defer setup()()
	nc := make(chan string, 1)
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		b, _ := ioutil.ReadAll(r.Body)
		nc <- string(b)
	}))
	defer ts.Close()
	u, err := url.Parse(ts.URL)
	if err != nil {
		t.Fatal(err)
	}
	c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(`
		template t {
			subject = template
		}
		notification n {
			post = http://%s/
		}
		alert a {
			template = t
			critNotification = n
			crit = 1
		}
	`, u.Host))
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c)
	if err != nil {
		t.Fatal(err)
	}
	r := &RunHistory{
		Events: map[models.AlertKey]*models.Event{
			models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown},
			models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown},
		},
	}
	s.RunHistory(r)
	s.CheckNotifications()
	s.sendUnknownNotifications()
	gotExpected := false
Loop:
	for {
		select {
		case r := <-nc:
			if r == "a: 2 unknown alerts" {
				gotExpected = true
			} else {
				t.Fatalf("unexpected: %v", r)
			}
		// TODO: remove this silly timeout-based test
		case <-time.After(time.Second):
			break Loop
		}
	}
	if !gotExpected {
		t.Errorf("didn't get expected result")
	}
}
Esempio n. 2
0
func ConfigTest(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) {
	b, err := ioutil.ReadAll(r.Body)
	if err != nil {
		serveError(w, err)
		return
	}
	if len(b) == 0 {
		serveError(w, fmt.Errorf("empty config"))
		return
	}
	_, err = rule.NewConf("test", schedule.SystemConf.EnabledBackends(), string(b))
	if err != nil {
		fmt.Fprintf(w, err.Error())
	}
}
Esempio n. 3
0
func TestIncidentIds(t *testing.T) {
	defer setup()()
	c, err := rule.NewConf("", conf.EnabledBackends{}, `
		alert a {
			crit = 1
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	s, _ := initSched(&conf.SystemConf{}, c)
	ak := models.NewAlertKey("a", nil)
	r := &RunHistory{
		Events: map[models.AlertKey]*models.Event{
			ak: {Status: models.StWarning},
		},
	}
	expect := func(id int64) {
		incident, err := s.DataAccess.State().GetLatestIncident(ak)
		if err != nil {
			t.Fatal(err)
		}
		if incident.Id != id {
			t.Fatalf("Expeted incident id %d. Got %d.", id, incident.Id)
		}
	}
	s.RunHistory(r)
	expect(1)

	r.Events[ak].Status = models.StNormal
	s.RunHistory(r)
	expect(1)

	r.Events[ak].Status = models.StWarning
	s.RunHistory(r)
	expect(1)

	r.Events[ak].Status = models.StNormal
	s.RunHistory(r)
	err = s.ActionByAlertKey("", "", models.ActionClose, ak)
	if err != nil {
		t.Fatal(err)
	}
	r.Events[ak].Status = models.StWarning
	s.RunHistory(r)
	expect(2)
}
Esempio n. 4
0
func TestCheckSilence(t *testing.T) {
	defer setup()()
	done := make(chan bool, 1)
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		done <- true
	}))
	defer ts.Close()
	u, err := url.Parse(ts.URL)
	if err != nil {
		t.Fatal(err)
	}
	c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(`
		template t {
			subject = "test"
			body = "test"
		}
		notification n {
			post = http://%s/
		}
		alert a {
			template = t
			warnNotification = n
			warn = 1
		}
	`, u.Host))
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{}, c)
	if err != nil {
		t.Fatal(err)
	}
	_, err = s.AddSilence(utcNow().Add(-time.Hour), utcNow().Add(time.Hour), "a", "", false, true, "", "user", "message")
	if err != nil {
		t.Fatal(err)
	}
	check(s, utcNow())
	s.CheckNotifications()
	select {
	case <-done:
		t.Fatal("silenced notification was sent")
	case <-time.After(time.Second * 2):
		// Timeout *probably* means the silence worked
	}
}
Esempio n. 5
0
func TestErrorTemplate(t *testing.T) {
	c, err := rule.NewConf("", conf.EnabledBackends{}, `
		template t {
			body = {{.Eval "invalid"}}
		}
		alert a {
			template = t
			crit = 1
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	_, err = procRule(nil, c, c.Alerts["a"], time.Time{}, false, "", "")
	if err != nil {
		t.Fatal(err)
	}
}
Esempio n. 6
0
func TestCheckNotify(t *testing.T) {
	defer setup()()
	nc := make(chan string)
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		b, _ := ioutil.ReadAll(r.Body)
		nc <- string(b)
	}))
	defer ts.Close()
	u, err := url.Parse(ts.URL)
	if err != nil {
		t.Fatal(err)
	}
	c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(`
		template t {
			subject = {{.Last.Status}}
		}
		notification n {
			post = http://%s/
		}
		alert a {
			template = t
			warnNotification = n
			warn = 1
		}
	`, u.Host))
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{}, c)
	if err != nil {
		t.Fatal(err)
	}
	check(s, utcNow())
	s.CheckNotifications()
	select {
	case r := <-nc:
		if r != "warning" {
			t.Fatalf("expected warning, got %v", r)
		}
	case <-time.After(time.Second):
		t.Fatal("failed to receive notification before timeout")
	}
}
Esempio n. 7
0
func TestActionNotificationTemplates(t *testing.T) {
	c, err := rule.NewConf("", conf.EnabledBackends{}, ``)
	if err != nil {
		t.Fatal(err)
	}
	s, _ := initSched(&conf.SystemConf{Hostname: "abc"}, c)
	data := &actionNotificationContext{}
	data.ActionType = models.ActionAcknowledge
	data.Message = "Bad things happened"
	data.User = "******"
	data.States = []*models.IncidentState{
		{
			Id: 224,
			Events: []models.Event{
				{
					Status: models.StCritical,
				},
			},
			Alert:   "xyz",
			Subject: "Critical!!",
		},
	}
	data.schedule = s
	buf := &bytes.Buffer{}
	err = actionNotificationBodyTemplate.Execute(buf, data)
	if err != nil {
		t.Fatal(err)
	}
	if !strings.Contains(buf.String(), "http://abc/incident?id=224") {
		t.Fatal("Expected link to incident in body")
	}
	buf = &bytes.Buffer{}
	err = actionNotificationSubjectTemplate.Execute(buf, data)
	if err != nil {
		t.Fatal(err)
	}
	if !strings.Contains(buf.String(), "Batman Acknowledged") {
		t.Fatal("Expected name and actionType in subject")
	}
}
Esempio n. 8
0
func buildConfig(r *http.Request) (c conf.RuleConfProvider, a *conf.Alert, hash string, err error) {
	config, err := ioutil.ReadAll(r.Body)
	if err != nil {
		return nil, nil, "", err
	}
	c, err = rule.NewConf("Test Config", schedule.SystemConf.EnabledBackends(), string(config))
	if err != nil {
		return nil, nil, "", err
	}
	hash, err = sched.DefaultSched.DataAccess.Configs().SaveTempConfig(string(config))
	if err != nil {
		return nil, nil, "", err
	}
	alertName := r.FormValue("alert")
	if alertName == "" {
		return nil, nil, "", fmt.Errorf("must supply alert to run")
	}
	a = c.GetAlert(alertName)
	if a == nil {
		return nil, nil, "", fmt.Errorf("alert %s not found", alertName)
	}
	return c, a, hash, nil

}
Esempio n. 9
0
func TestActionNotificationGrouping(t *testing.T) {
	defer setup()()
	c, err := rule.NewConf("", conf.EnabledBackends{}, `
		template t{
			subject = 2
		}
		notification n1 {
			print = true
		}
		notification n2{
			print = true
		}
		notification n3{
			print = true
			runOnActions = true
		}
		notification n4{
			print = true
			runOnActions = false
		}
		alert a {
			template = t
			warnNotification = n1
			critNotification = n2
			warnNotification = n4
			crit = 1
			warn = 1
		}
		alert b{
			template = t
			warnNotification = n2
			critNotification = n3
			crit = 1
			warn = 1
		}
		lookup byHost{
			entry host=a{
				main_contact = n2
			}
			entry host=b{
				main_contact = n3
			}
		}
		alert c{
			template = t
			warnNotification = n1
			warnNotification = lookup("byHost", "main_contact")
			warn = 1
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{}, c)
	if err != nil {
		t.Fatal(err)
	}
	awarn := models.AlertKey("a{host=w}")
	acrit := models.AlertKey("a{host=c}")
	bwarn := models.AlertKey("b{host=w}")
	bcrit := models.AlertKey("b{host=c}")
	cA := models.AlertKey("c{host=a}")
	cB := models.AlertKey("c{host=b}")
	da := s.DataAccess.State()
	da.UpdateIncidentState(&models.IncidentState{AlertKey: awarn, Alert: awarn.Name(), Tags: awarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: acrit, Alert: acrit.Name(), Tags: acrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: bwarn, Alert: bwarn.Name(), Tags: bwarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: bcrit, Alert: bcrit.Name(), Tags: bcrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: cA, Alert: cA.Name(), Tags: cA.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})
	da.UpdateIncidentState(&models.IncidentState{AlertKey: cB, Alert: cB.Name(), Tags: cB.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}})

	groups, err := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB})
	if err != nil {
		t.Fatal(err)
	}
	expect := func(not string, aks ...models.AlertKey) {
		n := c.Notifications[not]
		actualAks, ok := groups[n]
		if !ok {
			t.Fatalf("Notification %s not present in groupings.", not)
		}
		if len(actualAks) != len(aks) {
			t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks))
		}
		for i, ak := range aks {
			if actualAks[i].AlertKey != ak {
				t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey, ak)
			}
		}
	}
	expect("n1", awarn, cA, cB)
	expect("n2", acrit, bwarn, cA)
	expect("n3", bcrit, cB)
}
Esempio n. 10
0
func TestCheckNotifyLog(t *testing.T) {
	defer setup()()
	nc := make(chan string, 1)
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		b, _ := ioutil.ReadAll(r.Body)
		nc <- string(b)
	}))
	defer ts.Close()
	u, err := url.Parse(ts.URL)
	if err != nil {
		t.Fatal(err)
	}
	c, err := rule.NewConf("", conf.EnabledBackends{}, fmt.Sprintf(`
		template t {
			subject = {{.Alert.Name}}
		}
		notification n {
			post = http://%s/
		}
		alert a {
			template = t
			critNotification = n
			crit = 1
		}
		alert b {
			template = t
			critNotification = n
			crit = 1
			log = true
		}
	`, u.Host))
	if err != nil {
		t.Fatal(err)
	}
	s, err := initSched(&conf.SystemConf{}, c)
	if err != nil {
		t.Fatal(err)
	}
	check(s, utcNow())
	s.CheckNotifications()
	gotA := false
	gotB := false
Loop:
	for {
		select {
		case r := <-nc:
			if r == "a" && !gotA {
				gotA = true
			} else if r == "b" && !gotB {
				gotB = true
			} else {
				t.Errorf("unexpected: %v", r)
			}
		// TODO: remove this silly timeout-based test
		case <-time.After(time.Second):
			break Loop
		}
	}
	if !gotA {
		t.Errorf("didn't get expected a")
	}
	if !gotB {
		t.Errorf("didn't get expected b")
	}
	status, err := s.DataAccess.State().GetAllOpenIncidents()
	if err != nil {
		t.Fatal(err)
	}
	for _, st := range status {
		switch st.AlertKey {
		case "a{}":
			if !st.Open {
				t.Errorf("expected a to be open")
			}
		case "b{}":
			if st.Open {
				t.Errorf("expected b to be closed")
			}
		default:
			t.Errorf("unexpected alert key %s", st.AlertKey)
		}
	}
}
Esempio n. 11
0
func TestCheckFlapping(t *testing.T) {
	defer setup()()
	c, err := rule.NewConf("", conf.EnabledBackends{}, `
		template t {
			subject = 1
		}
		notification n {
			print = true
		}
		alert a {
			warnNotification = n
			warn = 1
			critNotification = n
			crit = 1
			template = t
		}
	`)
	if err != nil {
		t.Fatal(err)
	}
	s, _ := initSched(&conf.SystemConf{}, c)
	ak := models.NewAlertKey("a", nil)
	r := &RunHistory{
		Events: map[models.AlertKey]*models.Event{
			ak: {Status: models.StWarning},
		},
	}
	hasNots := func() bool {
		defer func() {
			s.pendingNotifications = nil
		}()
		if len(s.pendingNotifications) != 1 {
			return false
		}
		for k, v := range s.pendingNotifications {
			if k.Name != "n" || len(v) != 1 || v[0].Alert != "a" {
				return false
			}
			return true
		}
		return false
	}

	type stateTransition struct {
		S          models.Status
		ExpectNots bool
	}
	transitions := []stateTransition{
		{models.StWarning, true},
		{models.StNormal, false},
		{models.StWarning, false},
		{models.StNormal, false},
		{models.StCritical, true},
		{models.StWarning, false},
		{models.StCritical, false},
	}

	for i, trans := range transitions {
		r.Events[ak].Status = trans.S
		s.RunHistory(r)
		has := hasNots()
		if has && !trans.ExpectNots {
			t.Fatalf("unexpected notifications for transition %d.", i)
		} else if !has && trans.ExpectNots {
			t.Fatalf("expected notifications for transition %d.", i)
		}
	}
	r.Events[ak].Status = models.StNormal
	s.RunHistory(r)
	// Close the alert, so it should notify next time.
	if err := s.ActionByAlertKey("", "", models.ActionClose, ak); err != nil {
		t.Fatal(err)
	}
	r.Events[ak].Status = models.StWarning
	s.RunHistory(r)
	if !hasNots() {
		t.Fatal("expected notification")
	}
}
Esempio n. 12
0
func testSched(t *testing.T, st *schedTest) (s *Schedule) {
	bosunStartupTime = time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC) //pretend we've been running for a while.
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		var req opentsdb.Request
		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
			log.Fatal(err)
		}
		var resp opentsdb.ResponseSet
		for _, rq := range req.Queries {
			qs := fmt.Sprintf(`q("%s", "%v", "%v")`, rq, req.Start, req.End)
			q, ok := st.queries[qs]
			if !ok {
				t.Errorf("unknown query: %s", qs)
				return
			}
			if q == nil {
				return // Put nil entry in map to simulate opentsdb error.
			}
			resp = append(resp, q...)
		}
		if err := json.NewEncoder(w).Encode(&resp); err != nil {
			log.Fatal(err)
		}
	}))
	defer ts.Close()
	u, err := url.Parse(ts.URL)
	if err != nil {
		t.Fatal(err)
	}
	//confs := "tsdbHost = " + u.Host + "\n" + st.conf
	c, err := rule.NewConf("testconf", conf.EnabledBackends{OpenTSDB: true}, st.conf)
	if err != nil {
		t.Error(err)
		t.Logf("conf:\n%s", st.conf)
		return
	}

	time.Sleep(time.Millisecond * 250)
	sysConf := &conf.SystemConf{CheckFrequency: conf.Duration{Duration: time.Minute * 5}, DefaultRunEvery: 1, UnknownThreshold: 5, MinGroupSize: 5, OpenTSDBConf: conf.OpenTSDBConf{Host: u.Host, ResponseLimit: 1 << 20}}
	s, _ = initSched(sysConf, c)
	for ak, time := range st.touched {
		s.DataAccess.State().TouchAlertKey(ak, time)
	}
	check(s, queryTime)
	groups, err := s.MarshalGroups(new(miniprofiler.Profile), "")
	if err != nil {
		t.Error(err)
		return
	}
	var check func(g *StateGroup)
	check = func(g *StateGroup) {
		for _, c := range g.Children {
			check(c)
		}
		if g.AlertKey == "" {
			return
		}
		ss := schedState{string(g.AlertKey), g.Status.String()}
		v, ok := st.state[ss]
		if !ok {
			t.Errorf("unexpected state: %s, %s", g.AlertKey, g.Status)
			return
		}
		if v != g.Active {
			t.Errorf("bad active: %s, %s", g.AlertKey, g.Status)
			return
		}
		delete(st.state, ss)
	}
	for _, v := range groups.Groups.NeedAck {
		check(v)
	}
	for _, v := range groups.Groups.Acknowledged {
		check(v)
	}
	for k := range st.state {
		t.Errorf("unused state: %s", k)
	}
	return s
}