Beispiel #1
0
func (s *Schedule) sendUnknownNotifications() {
	slog.Info("Batching and sending unknown notifications")
	defer slog.Info("Done sending unknown notifications")
	for n, states := range s.pendingUnknowns {
		ustates := make(States)
		for _, st := range states {
			ustates[st.AlertKey] = st
		}
		var c int
		tHit := false
		oTSets := make(map[string]models.AlertKeys)
		groupSets := ustates.GroupSets(s.SystemConf.GetMinGroupSize())
		for name, group := range groupSets {
			c++
			if c >= s.SystemConf.GetUnknownThreshold() && s.SystemConf.GetUnknownThreshold() > 0 {
				if !tHit && len(groupSets) == 0 {
					// If the threshold is hit but only 1 email remains, just send the normal unknown
					s.unotify(name, group, n)
					break
				}
				tHit = true
				oTSets[name] = group
			} else {
				s.unotify(name, group, n)
			}
		}
		if len(oTSets) > 0 {
			s.utnotify(oTSets, n)
		}
	}
	s.pendingUnknowns = make(map[*conf.Notification][]*models.IncidentState)
}
Beispiel #2
0
// load stored last data from redis
func (s *Search) loadLast() {
	s.Lock()
	defer s.Unlock()
	slog.Info("Loading last datapoints from redis")
	m, err := s.DataAccess.Search().LoadLastInfos()
	if err != nil {
		slog.Error(err)
	} else {
		s.last = m
	}
	slog.Info("Done")
}
Beispiel #3
0
Datei: aws.go Projekt: urog/bosun
func c_aws(accessKey, secretKey, region string) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	creds := credentials.NewStaticCredentials(accessKey, secretKey, "")
	conf := &aws.Config{
		Credentials: creds,
		Region:      &region,
	}
	ecc := ec2.New(conf)
	if ecc == nil {
		return nil, fmt.Errorf("unable to login to EC2")
	}
	as := autoscaling.New(conf)
	if as == nil {
		return nil, fmt.Errorf("unable to login to AutoScaling")
	}
	elb := elb.New(conf)
	if elb == nil {
		return nil, fmt.Errorf("unable to login to ELB")
	}
	cw := cloudwatch.New(conf)
	if cw == nil {
		return nil, fmt.Errorf("unable to login to CloudWatch")
	}
	instances, err := awsGetInstances(*ecc)
	if err != nil {
		slog.Info("No EC2 Instances found.")
	}
	loadBalancers, err := awsGetLoadBalancers(*elb)
	if err != nil {
		slog.Info("No ELB Load Balancecrs found.")
	}
	asgs, err := awsGetAutoScaleGroups(*as)
	if err != nil {
		slog.Info("No AutoScaleGroups found.")
	}
	for _, loadBalancer := range loadBalancers {
		awsGetELBLatency(*cw, &md, loadBalancer)
		awsGetELBHostCounts(*cw, &md, loadBalancer)
		awsGetELB2XX(*cw, &md, loadBalancer)
		awsGetELB5XX(*cw, &md, loadBalancer)
	}
	for _, instance := range instances {
		awsGetCPU(*cw, &md, instance)
		awsGetNetwork(*cw, &md, instance)
		awsGetDiskBytes(*cw, &md, instance)
		awsGetDiskOps(*cw, &md, instance)
		awsGetStatusChecks(*cw, &md, instance)
	}
	for _, asg := range asgs {
		awsGetAsgCPU(*cw, &md, asg)
	}
	return md, nil
}
Beispiel #4
0
func migrateTagMetadata(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "metadata")
	if err != nil {
		return err
	}
	if !migrated {
		slog.Info("Migrating metadata to new database format")
		type Metavalue struct {
			Time  time.Time
			Value interface{}
		}
		metadata := make(map[metadata.Metakey]*Metavalue)
		if err := decode(db, "metadata", &metadata); err == nil {
			for k, v := range metadata {
				err = data.Metadata().PutTagMetadata(k.TagSet(), k.Name, fmt.Sprint(v.Value), v.Time)
				if err != nil {
					return err
				}
			}
			err = deleteKey(db, "metadata")
			if err != nil {
				return err
			}
		}
		err = setMigrated(db, "metadata")
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #5
0
func migrateIncidents(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "incidents")
	if err != nil {
		return err
	}
	if migrated {
		return nil
	}
	slog.Info("migrating incidents")
	incidents := map[uint64]*models.Incident{}
	if err := decode(db, "incidents", &incidents); err != nil {
		return err
	}
	max := uint64(0)
	for k, v := range incidents {
		data.Incidents().UpdateIncident(k, v)
		if k > max {
			max = k
		}
	}

	if err = data.Incidents().SetMaxId(max); err != nil {
		return err
	}
	if err = setMigrated(db, "incidents"); err != nil {
		return err
	}

	return nil
}
Beispiel #6
0
func migrateNotifications(db *bolt.DB, s *Schedule) error {
	migrated, err := isMigrated(db, "notifications")
	if err != nil {
		return err
	}
	if !migrated {
		slog.Info("Migrating notifications to new database format")
		nots := map[models.AlertKey]map[string]time.Time{}
		err := decode(db, "notifications", &nots)
		if err != nil {
			return err
		}
		for ak, ns := range nots {
			for n, t := range ns {
				not := s.Conf.Notifications[n]
				if not == nil {
					continue
				}
				if err = s.DataAccess.Notifications().InsertNotification(ak, n, t.Add(not.Timeout)); err != nil {
					return nil
				}
			}
		}
		setMigrated(db, "notifications")
	}
	return nil
}
Beispiel #7
0
func (s *Search) backupLoop() {
	for {
		time.Sleep(2 * time.Minute)
		slog.Info("Backing up last data to redis")
		err := s.BackupLast()
		if err != nil {
			slog.Error(err)
		}
	}
}
Beispiel #8
0
func sendBatch(batch []*opentsdb.DataPoint) {
	if Print {
		for _, d := range batch {
			j, err := d.MarshalJSON()
			if err != nil {
				slog.Error(err)
			}
			slog.Info(string(j))
		}
		recordSent(len(batch))
		return
	}

	now := time.Now()
	resp, err := SendDataPoints(batch, tsdbURLs[currentTsdbURL])
	if err == nil {
		defer resp.Body.Close()
	}
	d := time.Since(now).Nanoseconds() / 1e6
	Sample("collect.post.duration", Tags, float64(d))
	Add("collect.post.total_duration", Tags, d)
	Add("collect.post.count", Tags, 1)
	// Some problem with connecting to the server; retry later.
	if err != nil || (resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK) {
		if err != nil {
			Add("collect.post.error", Tags, 1)
			slog.Error(err)
			// Switch endpoint if possible
			currentTsdbURL = (currentTsdbURL + 1) % len(tsdbURLs)
		} else if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
			Add("collect.post.bad_status", Tags, 1)
			slog.Errorln(resp.Status)
			body, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				slog.Error(err)
			}
			if len(body) > 0 {
				slog.Error(string(body))
			}
			// Switch endpoint if possible
			currentTsdbURL = (currentTsdbURL + 1) % len(tsdbURLs)
		}
		restored := 0
		for _, msg := range batch {
			restored++
			tchan <- msg
		}
		d := time.Second * 5
		Add("collect.post.restore", Tags, int64(restored))
		slog.Infof("restored %d, sleeping %s", restored, d)
		time.Sleep(d)
		return
	}
	recordSent(len(batch))
}
Beispiel #9
0
func migrateSearch(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "search")
	if err != nil {
		return err
	}
	if !migrated {
		slog.Info("Migrating Search data to new database format")
		type duple struct{ A, B string }
		type present map[string]int64
		type qmap map[duple]present
		type smap map[string]present

		metric := qmap{}
		if err := decode(db, "metric", &metric); err == nil {
			for k, v := range metric {
				for metric, time := range v {
					data.Search().AddMetricForTag(k.A, k.B, metric, time)
				}
			}
		} else {
			return err
		}
		tagk := smap{}
		if err := decode(db, "tagk", &tagk); err == nil {
			for metric, v := range tagk {
				for tk, time := range v {
					data.Search().AddTagKeyForMetric(metric, tk, time)
				}
				data.Search().AddMetric(metric, time.Now().Unix())
			}
		} else {
			return err
		}

		tagv := qmap{}
		if err := decode(db, "tagv", &tagv); err == nil {
			for k, v := range tagv {
				for val, time := range v {
					data.Search().AddTagValue(k.A, k.B, val, time)
					data.Search().AddTagValue(database.Search_All, k.B, val, time)
				}
			}
		} else {
			return err
		}
		err = setMigrated(db, "search")
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #10
0
func migrateMetricMetadata(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "metadata-metric")
	if err != nil {
		return err
	}
	if !migrated {
		slog.Info("Migrating metric metadata to new database format")
		type MetadataMetric struct {
			Unit        string `json:",omitempty"`
			Type        string `json:",omitempty"`
			Description string
		}
		mms := map[string]*MetadataMetric{}
		if err := decode(db, "metadata-metric", &mms); err == nil {
			for name, mm := range mms {
				if mm.Description != "" {
					err = data.Metadata().PutMetricMetadata(name, "desc", mm.Description)
					if err != nil {
						return err
					}
				}
				if mm.Unit != "" {
					err = data.Metadata().PutMetricMetadata(name, "unit", mm.Unit)
					if err != nil {
						return err
					}
				}
				if mm.Type != "" {
					err = data.Metadata().PutMetricMetadata(name, "rate", mm.Type)
					if err != nil {
						return err
					}
				}
			}
			err = setMigrated(db, "metadata-metric")
			if err != nil {
				return err
			}
		}
	}
	return nil
}
Beispiel #11
0
func migrateSilence(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "silence")
	if err != nil {
		return err
	}
	if migrated {
		return nil
	}
	slog.Info("migrating silence")
	silence := map[string]*models.Silence{}
	if err := decode(db, "silence", &silence); err != nil {
		return err
	}
	for _, v := range silence {
		v.TagString = v.Tags.Tags()
		data.Silence().AddSilence(v)
	}
	if err = setMigrated(db, "silence"); err != nil {
		return err
	}
	return nil
}
Beispiel #12
0
func printPut(c chan *opentsdb.DataPoint) {
	for dp := range c {
		b, _ := json.Marshal(dp)
		slog.Info(string(b))
	}
}
Beispiel #13
0
func sendBatch(batch []json.RawMessage) {
	if Print {
		for _, d := range batch {
			slog.Info(string(d))
		}
		recordSent(len(batch))
		return
	}
	var buf bytes.Buffer
	g := gzip.NewWriter(&buf)
	if err := json.NewEncoder(g).Encode(batch); err != nil {
		slog.Error(err)
		return
	}
	if err := g.Close(); err != nil {
		slog.Error(err)
		return
	}
	req, err := http.NewRequest("POST", tsdbURL, &buf)
	if err != nil {
		slog.Error(err)
		return
	}
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("Content-Encoding", "gzip")
	now := time.Now()
	resp, err := client.Do(req)
	d := time.Since(now).Nanoseconds() / 1e6
	if err == nil {
		defer resp.Body.Close()
	}
	Add("collect.post.total_duration", Tags, d)
	Add("collect.post.count", Tags, 1)
	// Some problem with connecting to the server; retry later.
	if err != nil || resp.StatusCode != http.StatusNoContent {
		if err != nil {
			Add("collect.post.error", Tags, 1)
			slog.Error(err)
		} else if resp.StatusCode != http.StatusNoContent {
			Add("collect.post.bad_status", Tags, 1)
			slog.Errorln(resp.Status)
			body, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				slog.Error(err)
			}
			if len(body) > 0 {
				slog.Error(string(body))
			}
		}
		restored := 0
		for _, msg := range batch {
			var dp opentsdb.DataPoint
			if err := json.Unmarshal(msg, &dp); err != nil {
				slog.Error(err)
				continue
			}
			restored++
			tchan <- &dp
		}
		d := time.Second * 5
		Add("collect.post.restore", Tags, int64(restored))
		slog.Infof("restored %d, sleeping %s", restored, d)
		time.Sleep(d)
		return
	}
	recordSent(len(batch))
}
Beispiel #14
0
func migrateOldDataToRedis(db *bolt.DB, data database.DataAccess) error {
	// metadata-metric

	migrated, err := isMigrated(db, "metadata-metric")
	if err != nil {
		return err
	}
	if !migrated {
		type MetadataMetric struct {
			Unit        string `json:",omitempty"`
			Type        string `json:",omitempty"`
			Description string
		}
		slog.Info("Migrating metric metadata to new database format")
		mms := map[string]*MetadataMetric{}
		if err := decode(db, "metadata-metric", &mms); err == nil {
			for name, mm := range mms {
				if mm.Description != "" {
					err = data.PutMetricMetadata(name, "desc", mm.Description)
					if err != nil {
						return err
					}
				}
				if mm.Unit != "" {
					err = data.PutMetricMetadata(name, "unit", mm.Unit)
					if err != nil {
						return err
					}
				}
				if mm.Type != "" {
					err = data.PutMetricMetadata(name, "rate", mm.Type)
					if err != nil {
						return err
					}
				}
			}
			err = setMigrated(db, "metadata-metric")
			if err != nil {
				return err
			}
		}
	}
	//metadata
	migrated, err = isMigrated(db, "metadata")
	if err != nil {
		return err
	}
	if !migrated {
		slog.Info("Migrating metadata to new database format")
		type Metavalue struct {
			Time  time.Time
			Value interface{}
		}
		metadata := make(map[metadata.Metakey]*Metavalue)
		if err := decode(db, "metadata", &metadata); err == nil {

			for k, v := range metadata {
				err = data.PutTagMetadata(k.TagSet(), k.Name, fmt.Sprint(v.Value), v.Time)
				if err != nil {
					return err
				}
			}
			err = deleteKey(db, "metadata")
			if err != nil {
				return err
			}
		}
		err = setMigrated(db, "metadata")
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #15
0
func migrateState(db *bolt.DB, data database.DataAccess) error {
	migrated, err := isMigrated(db, "state")
	if err != nil {
		return err
	}
	if migrated {
		return nil
	}
	//redefine the structs as they were when we gob encoded them
	type Result struct {
		*expr.Result
		Expr string
	}
	mResult := func(r *Result) *models.Result {
		if r == nil || r.Result == nil {
			return &models.Result{}
		}
		v, _ := valueToFloat(r.Result.Value)
		return &models.Result{
			Computations: r.Result.Computations,
			Value:        models.Float(v),
			Expr:         r.Expr,
		}
	}
	type Event struct {
		Warn, Crit  *Result
		Status      models.Status
		Time        time.Time
		Unevaluated bool
		IncidentId  uint64
	}
	type State struct {
		*Result
		History      []Event
		Actions      []models.Action
		Touched      time.Time
		Alert        string
		Tags         string
		Group        opentsdb.TagSet
		Subject      string
		Body         string
		EmailBody    []byte
		EmailSubject []byte
		Attachments  []*models.Attachment
		NeedAck      bool
		Open         bool
		Forgotten    bool
		Unevaluated  bool
		LastLogTime  time.Time
	}
	type OldStates map[models.AlertKey]*State
	slog.Info("migrating state")
	states := OldStates{}
	if err := decode(db, "status", &states); err != nil {
		return err
	}
	for ak, state := range states {
		if len(state.History) == 0 {
			continue
		}
		var thisId uint64
		events := []Event{}
		addIncident := func(saveBody bool) error {
			if thisId == 0 || len(events) == 0 || state == nil {
				return nil
			}
			incident := NewIncident(ak)
			incident.Expr = state.Expr

			incident.NeedAck = state.NeedAck
			incident.Open = state.Open
			incident.Result = mResult(state.Result)
			incident.Unevaluated = state.Unevaluated
			incident.Start = events[0].Time
			incident.Id = int64(thisId)
			incident.Subject = state.Subject
			if saveBody {
				incident.Body = state.Body
			}
			for _, ev := range events {
				incident.CurrentStatus = ev.Status
				mEvent := models.Event{
					Crit:        mResult(ev.Crit),
					Status:      ev.Status,
					Time:        ev.Time,
					Unevaluated: ev.Unevaluated,
					Warn:        mResult(ev.Warn),
				}
				incident.Events = append(incident.Events, mEvent)
				if ev.Status > incident.WorstStatus {
					incident.WorstStatus = ev.Status
				}
				if ev.Status > models.StNormal {
					incident.LastAbnormalStatus = ev.Status
					incident.LastAbnormalTime = ev.Time.UTC().Unix()
				}
			}
			for _, ac := range state.Actions {
				if ac.Time.Before(incident.Start) {
					continue
				}
				incident.Actions = append(incident.Actions, ac)
				if ac.Time.After(incident.Events[len(incident.Events)-1].Time) && ac.Type == models.ActionClose {
					incident.End = &ac.Time
					break
				}
			}
			if err := data.State().ImportIncidentState(incident); err != nil {
				return err
			}
			return nil
		}
		//essentially a rle algorithm to assign events to incidents
		for _, e := range state.History {
			if e.Status > models.StUnknown {
				continue
			}
			if e.IncidentId == 0 {
				//include all non-assigned incidents up to the next non-match
				events = append(events, e)
				continue
			}
			if thisId == 0 {
				thisId = e.IncidentId
				events = append(events, e)
			}
			if e.IncidentId != thisId {
				if err := addIncident(false); err != nil {
					return err
				}
				thisId = e.IncidentId
				events = []Event{e}

			} else {
				events = append(events, e)
			}
		}
		if err := addIncident(true); err != nil {
			return err
		}
	}
	if err = setMigrated(db, "state"); err != nil {
		return err
	}
	return nil
}