Example #1
0
func (s *Schedule) Action(user, message string, t models.ActionType, ak models.AlertKey) error {
	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
		slog.Errorln(err)
	}
	st, err := s.DataAccess.State().GetLatestIncident(ak)
	if err != nil {
		return err
	}
	if st == nil {
		return fmt.Errorf("no such alert key: %v", ak)
	}
	isUnknown := st.LastAbnormalStatus == models.StUnknown
	timestamp := utcNow()
	switch t {
	case models.ActionAcknowledge:
		if !st.NeedAck {
			return fmt.Errorf("alert already acknowledged")
		}
		if !st.Open {
			return fmt.Errorf("cannot acknowledge closed alert")
		}
		st.NeedAck = false
		if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
			return err
		}
	case models.ActionClose:
		if st.IsActive() {
			return fmt.Errorf("cannot close active alert")
		}
		fallthrough
	case models.ActionForceClose:
		st.Open = false
		st.End = &timestamp
	case models.ActionForget:
		if !isUnknown {
			return fmt.Errorf("can only forget unknowns")
		}
		fallthrough
	case models.ActionPurge:
		return s.DataAccess.State().Forget(ak)
	default:
		return fmt.Errorf("unknown action type: %v", t)
	}
	// Would like to also track the alert group, but I believe this is impossible because any character
	// that could be used as a delimiter could also be a valid tag key or tag value character
	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
		slog.Errorln(err)
	}
	st.Actions = append(st.Actions, models.Action{
		Message: message,
		Time:    timestamp,
		Type:    t,
		User:    user,
	})
	_, err = s.DataAccess.State().UpdateIncidentState(st)
	return err
}
Example #2
0
func (s *Schedule) Unlock() {
	holder := s.mutexHolder
	start := s.mutexAquired
	waitTime := s.mutexWaitTime
	s.mutex.Unlock()
	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "wait"}, waitTime)
	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "hold"}, int64(time.Since(start)/time.Millisecond))
	collect.Add("schedule.lock_count", opentsdb.TagSet{"caller": holder}, 1)
}
Example #3
0
func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (ak models.AlertKey, e error) {
	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
		slog.Errorln(err)
	}
	defer func() {
		if e == nil {
			if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
				slog.Errorln(err)
			}
			if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
				e = err
			}
		}
	}()
	isUnknown := st.LastAbnormalStatus == models.StUnknown
	timestamp := utcNow()
	switch t {
	case models.ActionAcknowledge:
		if !st.NeedAck {
			return "", fmt.Errorf("alert already acknowledged")
		}
		if !st.Open {
			return "", fmt.Errorf("cannot acknowledge closed alert")
		}
		st.NeedAck = false
	case models.ActionClose:
		if st.IsActive() {
			return "", fmt.Errorf("cannot close active alert")
		}
		fallthrough
	case models.ActionForceClose:
		st.Open = false
		st.End = &timestamp
	case models.ActionForget:
		if !isUnknown {
			return "", fmt.Errorf("can only forget unknowns")
		}
		fallthrough
	case models.ActionPurge:
		return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
	case models.ActionNote:
		// pass
	default:
		return "", fmt.Errorf("unknown action type: %v", t)
	}
	st.Actions = append(st.Actions, models.Action{
		Message: message,
		Time:    timestamp,
		Type:    t,
		User:    user,
	})
	_, err := s.DataAccess.State().UpdateIncidentState(st)
	return st.AlertKey, err
}
Example #4
0
File: web.go Project: jmj/bosun
func (rp *relayProxy) ServeHTTP(responseWriter http.ResponseWriter, r *http.Request) {
	clean := func(s string) string {
		return opentsdb.MustReplace(s, "_")
	}
	reader := &passthru{ReadCloser: r.Body}
	r.Body = reader
	w := &relayWriter{ResponseWriter: responseWriter}
	rp.ReverseProxy.ServeHTTP(w, r)
	indexTSDB(r, reader.buf.Bytes())
	tags := opentsdb.TagSet{"path": clean(r.URL.Path), "remote": clean(strings.Split(r.RemoteAddr, ":")[0])}
	collect.Add("relay.bytes", tags, int64(reader.buf.Len()))
	tags["status"] = strconv.Itoa(w.code)
	collect.Add("relay.response", tags, 1)
}
Example #5
0
func (s *Schedule) Action(user, message string, t ActionType, ak expr.AlertKey) error {
	s.Lock("Action")
	defer s.Unlock()
	st := s.status[ak]
	if st == nil {
		return fmt.Errorf("no such alert key: %v", ak)
	}
	ack := func() {
		delete(s.Notifications, ak)
		st.NeedAck = false
	}
	isUnknown := st.AbnormalStatus() == StUnknown
	isError := st.AbnormalStatus() == StError
	timestamp := time.Now().UTC()
	switch t {
	case ActionAcknowledge:
		if !st.NeedAck {
			return fmt.Errorf("alert already acknowledged")
		}
		if !st.Open {
			return fmt.Errorf("cannot acknowledge closed alert")
		}
		ack()
	case ActionClose:
		if st.NeedAck {
			ack()
		}
		if st.IsActive() && !isError {
			return fmt.Errorf("cannot close active alert")
		}
		st.Open = false
		last := st.Last()
		if last.IncidentId != 0 {
			s.incidentLock.Lock()
			if incident, ok := s.Incidents[last.IncidentId]; ok {
				incident.End = &timestamp
			}
			s.incidentLock.Unlock()
		}
	case ActionForget:
		if !isUnknown {
			return fmt.Errorf("can only forget unknowns")
		}
		if st.NeedAck {
			ack()
		}
		st.Open = false
		st.Forgotten = true
		delete(s.status, ak)
	default:
		return fmt.Errorf("unknown action type: %v", t)
	}
	st.Action(user, message, t, timestamp)
	// Would like to also track the alert group, but I believe this is impossible because any character
	// that could be used as a delimiter could also be a valid tag key or tag value character
	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": ak.Name(), "type": t.String()}, 1); err != nil {
		log.Println(err)
	}
	return nil
}
Example #6
0
File: search.go Project: urog/bosun
func (s *Search) Index(mdp opentsdb.MultiDataPoint) {
	for _, dp := range mdp {
		s.Lock()
		mmap := s.last[dp.Metric]
		if mmap == nil {
			mmap = make(map[string]*database.LastInfo)
			s.last[dp.Metric] = mmap
		}
		p := mmap[dp.Tags.String()]
		if p == nil {
			p = &database.LastInfo{}
			mmap[dp.Tags.String()] = p
		}
		if p.Timestamp < dp.Timestamp {
			if fv, err := getFloat(dp.Value); err == nil {
				p.DiffFromPrev = (fv - p.LastVal) / float64(dp.Timestamp-p.Timestamp)
				p.LastVal = fv
			} else {
				slog.Error(err)
			}
			p.Timestamp = dp.Timestamp
		}
		s.Unlock()
		select {
		case s.indexQueue <- dp:
		default:
			collect.Add("search.dropped", opentsdb.TagSet{}, 1)
		}
	}
}
Example #7
0
func (s *Search) Index(mdp opentsdb.MultiDataPoint) {
	for _, dp := range mdp {
		s.Lock()
		metric := dp.Metric
		key := metric + dp.Tags.String()
		p := s.Last[key]
		if p == nil {
			p = &lastInfo{}
			s.Last[key] = p
		}
		if p.timestamp < dp.Timestamp {
			if fv, err := getFloat(dp.Value); err == nil {
				p.diffFromPrev = (fv - p.lastVal) / float64(dp.Timestamp-p.timestamp)
				p.lastVal = fv
			} else {
				slog.Error(err)
			}
			p.timestamp = dp.Timestamp
		}
		s.Unlock()
		select {
		case s.indexQueue <- dp:
		default:
			collect.Add("search.dropped", opentsdb.TagSet{}, 1)
		}
	}
}
Example #8
0
func (rp *relayProxy) relayPut(responseWriter http.ResponseWriter, r *http.Request, parse bool) {
	isRelayed := r.Header.Get(relayHeader) != ""
	reader := &passthru{ReadCloser: r.Body}
	r.Body = reader
	w := &relayWriter{ResponseWriter: responseWriter}
	rp.TSDBProxy.ServeHTTP(w, r)
	if w.code/100 != 2 {
		verbose("got status %d", w.code)
		return
	}
	verbose("relayed to tsdb")
	collect.Add("puts.relayed", opentsdb.TagSet{}, 1)
	// Send to bosun in a separate go routine so we can end the source's request.
	go func() {
		body := bytes.NewBuffer(reader.buf.Bytes())
		req, err := http.NewRequest(r.Method, bosunIndexURL, body)
		if err != nil {
			verbose("%v", err)
			return
		}
		resp, err := http.DefaultClient.Do(req)
		if err != nil {
			verbose("bosun relay error: %v", err)
			return
		}
		resp.Body.Close()
		verbose("bosun relay success")
	}()
	// Parse and denormalize datapoints
	if !isRelayed && parse && denormalizationRules != nil {
		go rp.denormalize(bytes.NewReader(reader.buf.Bytes()))
	}

	if !isRelayed && len(relayPutUrls) > 0 {
		go func() {
			for _, relayUrl := range relayPutUrls {
				body := bytes.NewBuffer(reader.buf.Bytes())
				req, err := http.NewRequest(r.Method, relayUrl, body)
				if err != nil {
					verbose("%v", err)
					return
				}
				req.Header.Set("Content-Type", "application/json")
				req.Header.Set("Content-Encoding", "gzip")
				req.Header.Add(relayHeader, myHost)
				resp, err := http.DefaultClient.Do(req)
				if err != nil {
					verbose("secondary relay error: %v", err)
					return
				}
				resp.Body.Close()
				verbose("secondary relay success")
			}
		}()
	}
}
Example #9
0
func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*Attachment) {
	e := email.NewEmail()
	e.From = c.EmailFrom
	for _, a := range n.Email {
		e.To = append(e.To, a.Address)
	}
	e.Subject = string(subject)
	e.HTML = body
	for _, a := range attachments {
		e.Attach(bytes.NewBuffer(a.Data), a.Filename, a.ContentType)
	}
	if err := Send(e, c.SMTPHost, c.SMTPUsername, c.SMTPPassword); err != nil {
		collect.Add("email.sent_failed", nil, 1)
		log.Printf("failed to send alert %v to %v %v\n", ak, e.To, err)
		return
	}
	collect.Add("email.sent", nil, 1)
	log.Printf("relayed alert %v to %v sucessfully\n", ak, e.To)
}
Example #10
0
func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*Attachment) {
	e := email.NewEmail()
	e.From = c.EmailFrom
	for _, a := range n.Email {
		e.To = append(e.To, a.Address)
	}
	e.Subject = string(subject)
	e.HTML = body
	for _, a := range attachments {
		e.Attach(bytes.NewBuffer(a.Data), a.Filename, a.ContentType)
	}
	e.Headers.Add("X-Bosun-Server", util.Hostname)
	if err := Send(e, c.SMTPHost, c.SMTPUsername, c.SMTPPassword); err != nil {
		collect.Add("email.sent_failed", nil, 1)
		slog.Errorf("failed to send alert %v to %v %v\n", ak, e.To, err)
		return
	}
	collect.Add("email.sent", nil, 1)
	slog.Infof("relayed alert %v to %v sucessfully. Subject: %d bytes. Body: %d bytes.", ak, e.To, len(subject), len(body))
}
Example #11
0
File: web.go Project: jmj/bosun
func indexTSDB(r *http.Request, body []byte) {
	clean := func(s string) string {
		return opentsdb.MustReplace(s, "_")
	}
	if r, err := gzip.NewReader(bytes.NewReader(body)); err == nil {
		body, _ = ioutil.ReadAll(r)
		r.Close()
	}
	var dp opentsdb.DataPoint
	var mdp opentsdb.MultiDataPoint
	if err := json.Unmarshal(body, &mdp); err == nil {
	} else if err = json.Unmarshal(body, &dp); err == nil {
		mdp = opentsdb.MultiDataPoint{&dp}
	}
	if len(mdp) > 0 {
		ra := strings.Split(r.RemoteAddr, ":")[0]
		tags := opentsdb.TagSet{"remote": clean(ra)}
		collect.Add("search.puts_relayed", tags, 1)
		collect.Add("search.datapoints_relayed", tags, int64(len(mdp)))
		schedule.Search.Index(mdp)
	}
}
Example #12
0
func (rp *relayProxy) relayMetadata(responseWriter http.ResponseWriter, r *http.Request) {
	reader := &passthru{ReadCloser: r.Body}
	r.Body = reader
	w := &relayWriter{ResponseWriter: responseWriter}
	rp.BosunProxy.ServeHTTP(w, r)
	if w.code != 204 {
		verbose("got status %d", w.code)
		return
	}
	verbose("relayed metadata to bosun")
	collect.Add("metadata.relayed", opentsdb.TagSet{}, 1)
	if r.Header.Get(relayHeader) != "" {
		return
	}
	if len(relayPutUrls) != 0 {
		go func() {
			for _, relayUrl := range relayPutUrls {
				relayUrl = strings.Replace(relayUrl, "/put", "/metadata/put", 1)
				body := bytes.NewBuffer(reader.buf.Bytes())
				req, err := http.NewRequest(r.Method, relayUrl, body)
				if err != nil {
					verbose("%v", err)
					return
				}
				req.Header.Set("Content-Type", "application/json")
				req.Header.Add(relayHeader, myHost)
				resp, err := http.DefaultClient.Do(req)
				if err != nil {
					verbose("secondary relay error: %v", err)
					return
				}
				resp.Body.Close()
				verbose("secondary relay success")
			}
		}()
	}
}
Example #13
0
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error) {
	if e == nil {
		return
	}
	defer func() {
		if err == nil {
			return
		}
		collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
		slog.Errorln(err)
	}()
	results, err := s.executeExpr(T, rh, a, e)
	if err != nil {
		return nil, err
	}
Loop:
	for _, r := range results.Results {
		if s.Conf.Squelched(a, r.Group) {
			continue
		}
		ak := models.NewAlertKey(a.Name, r.Group)
		for _, v := range ignore {
			if ak == v {
				continue Loop
			}
		}
		var n float64
		n, err = valueToFloat(r.Value)
		if err != nil {
			return
		}
		event := rh.Events[ak]
		if event == nil {
			event = new(models.Event)
			rh.Events[ak] = event
		}
		result := &models.Result{
			Computations: r.Computations,
			Value:        models.Float(n),
			Expr:         e.String(),
		}
		switch checkStatus {
		case models.StWarning:
			event.Warn = result
		case models.StCritical:
			event.Crit = result
		}
		status := checkStatus
		if math.IsNaN(n) {
			status = checkStatus
		} else if n == 0 {
			status = models.StNormal
		}
		if status != models.StNormal {
			alerts = append(alerts, ak)
		}
		if status > rh.Events[ak].Status {
			event.Status = status
		}
	}
	return
}
Example #14
0
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus Status, ignore expr.AlertKeys) (alerts expr.AlertKeys, err error) {
	if e == nil {
		return
	}
	defer func() {
		if err == nil {
			return
		}
		collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
		slog.Errorln(err)
	}()
	results, err := s.executeExpr(T, rh, a, e)
	if err != nil {
		return nil, err
	}
Loop:
	for _, r := range results.Results {
		if s.Conf.Squelched(a, r.Group) {
			continue
		}
		ak := expr.NewAlertKey(a.Name, r.Group)
		for _, v := range ignore {
			if ak == v {
				continue Loop
			}
		}
		var n float64
		switch v := r.Value.(type) {
		case expr.Number:
			n = float64(v)
		case expr.Scalar:
			n = float64(v)
		default:
			err = fmt.Errorf("expected number or scalar")
			return
		}
		event := rh.Events[ak]
		if event == nil {
			event = new(Event)
			rh.Events[ak] = event
		}
		result := &Result{
			Result: r,
			Expr:   e.String(),
		}
		switch checkStatus {
		case StWarning:
			event.Warn = result
		case StCritical:
			event.Crit = result
		}
		status := checkStatus
		if math.IsNaN(n) {
			status = StError
		} else if n == 0 {
			status = StNormal
		}
		if status != StNormal {
			alerts = append(alerts, ak)
		}
		if status > rh.Events[ak].Status {
			event.Status = status
		}
	}
	return
}
Example #15
0
func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error, cancelled bool) {
	if e == nil {
		return
	}
	defer func() {
		if err == nil {
			return
		}
		collect.Add("check.errs", opentsdb.TagSet{"metric": a.Name}, 1)
		slog.Errorln(err)
	}()
	type res struct {
		results *expr.Results
		error   error
	}
	// See s.CheckAlert for an explanation of execution and cancellation with this channel
	rc := make(chan res, 1)
	var results *expr.Results
	go func() {
		results, err := s.executeExpr(T, rh, a, e)
		rc <- res{results, err}
	}()
	select {
	case res := <-rc:
		results = res.results
		err = res.error
	case <-s.runnerContext.Done():
		return nil, nil, true
	}
	if err != nil {
		return
	}
Loop:
	for _, r := range results.Results {
		if s.RuleConf.Squelched(a, r.Group) {
			continue
		}
		ak := models.NewAlertKey(a.Name, r.Group)
		for _, v := range ignore {
			if ak == v {
				continue Loop
			}
		}
		var n float64
		n, err = valueToFloat(r.Value)
		if err != nil {
			return
		}
		event := rh.Events[ak]
		if event == nil {
			event = new(models.Event)
			rh.Events[ak] = event
		}
		result := &models.Result{
			Computations: r.Computations,
			Value:        models.Float(n),
			Expr:         e.String(),
		}
		switch checkStatus {
		case models.StWarning:
			event.Warn = result
		case models.StCritical:
			event.Crit = result
		}
		status := checkStatus
		if math.IsNaN(n) {
			status = checkStatus
		} else if n == 0 {
			status = models.StNormal
		}
		if status != models.StNormal {
			alerts = append(alerts, ak)
		}
		if status > rh.Events[ak].Status {
			event.Status = status
		}
	}
	return
}