示例#1
0
func DatabaseConnect(c conf.Database) (*sql.DB, error) {
	var db *sql.DB
	var err error

	if c.Type == "mysql" {
		if c.Password == "" {
			db, err = sql.Open("mysql", c.Username+"@"+c.Protocol+"("+c.Address+":"+strconv.Itoa(c.Port)+")/")
		} else {
			db, err = sql.Open("mysql", c.Username+":"+c.Password+"@"+c.Protocol+"("+c.Address+":"+strconv.Itoa(c.Port)+")/")
		}
	}

	if err != nil {
		slog.Errorf("%v: %v: %v: %v", "Database", c.Type, "connect open error", err)
		return nil, fmt.Errorf("%v: %v", "connect open error", err)
	}

	// check if can connect to database, if fail, check again every minute until connected
	for {
		err = db.Ping()
		if err == nil {
			break
		}
		slog.Errorf("%v: %v: %v: %v", "Database", c.Type, "connect ping error", err)
		next := time.After(time.Minute)
		select {
		case <-next:
		case <-ContinuousCollectorVars.quit:
			db.Close()
			return nil, fmt.Errorf("connect while quitting")
		}
	}

	return db, nil
}
示例#2
0
文件: command.go 项目: nicollet/bosun
// Command executes the named program with the given arguments. If it does not
// exit within timeout, it is sent SIGINT (if supported by Go). After
// another timeout, it is killed.
func Command(timeout time.Duration, stdin io.Reader, name string, arg ...string) (io.Reader, error) {
	if _, err := exec.LookPath(name); err != nil {
		return nil, ErrPath
	}
	if Debug {
		slog.Infof("executing command: %v %v", name, arg)
	}
	c := exec.Command(name, arg...)
	b := &bytes.Buffer{}
	c.Stdout = b
	c.Stdin = stdin
	if err := c.Start(); err != nil {
		return nil, err
	}
	timedOut := false
	intTimer := time.AfterFunc(timeout, func() {
		slog.Errorf("Process taking too long. Interrupting: %s %s", name, strings.Join(arg, " "))
		c.Process.Signal(os.Interrupt)
		timedOut = true
	})
	killTimer := time.AfterFunc(timeout*2, func() {
		slog.Errorf("Process taking too long. Killing: %s %s", name, strings.Join(arg, " "))
		c.Process.Signal(os.Kill)
		timedOut = true
	})
	err := c.Wait()
	intTimer.Stop()
	killTimer.Stop()
	if timedOut {
		return nil, ErrTimeout
	}
	return b, err
}
示例#3
0
文件: bolt.go 项目: youngl98/bosun
func (s *Schedule) save() {
	if s.db == nil {
		return
	}
	s.Lock("Save")
	store := map[string]interface{}{
		dbMetric:        s.Search.Read.Metric,
		dbTagk:          s.Search.Read.Tagk,
		dbTagv:          s.Search.Read.Tagv,
		dbMetricTags:    s.Search.Read.MetricTags,
		dbNotifications: s.Notifications,
		dbSilence:       s.Silence,
		dbStatus:        s.status,
		dbMetadata:      s.Metadata,
		dbIncidents:     s.Incidents,
	}
	tostore := make(map[string][]byte)
	for name, data := range store {
		f := new(bytes.Buffer)
		gz := gzip.NewWriter(f)
		cw := &counterWriter{w: gz}
		enc := gob.NewEncoder(cw)
		if err := enc.Encode(data); err != nil {
			slog.Errorf("error saving %s: %v", name, err)
			s.Unlock()
			return
		}
		if err := gz.Flush(); err != nil {
			slog.Errorf("gzip flush error saving %s: %v", name, err)
		}
		if err := gz.Close(); err != nil {
			slog.Errorf("gzip close error saving %s: %v", name, err)
		}
		tostore[name] = f.Bytes()
		slog.Infof("wrote %s: %v", name, conf.ByteSize(cw.written))
		collect.Put("statefile.size", opentsdb.TagSet{"object": name}, cw.written)
	}
	s.Unlock()
	err := s.db.Update(func(tx *bolt.Tx) error {
		b, err := tx.CreateBucketIfNotExists([]byte(dbBucket))
		if err != nil {
			return err
		}
		for name, data := range tostore {
			if err := b.Put([]byte(name), data); err != nil {
				return err
			}
		}
		return nil
	})
	if err != nil {
		slog.Errorf("save db update error: %v", err)
		return
	}
	fi, err := os.Stat(s.Conf.StateFile)
	if err == nil {
		collect.Put("statefile.size", opentsdb.TagSet{"object": "total"}, fi.Size())
	}
	slog.Infoln("save to db complete")
}
示例#4
0
// structProcessor.add() takes in a metric name prefix, an arbitrary struct, and a tagset.
// The processor recurses through the struct and builds metrics. The field tags direct how
// the field should be processed, as well as the metadata for the resulting metric.
//
// The field tags used are described as follows:
//
// version: typically set to '1' or '2'.
//	This is compared against the elastic cluster version. If the version from the tag
//      does not match the version in production, the metric will not be sent for this field.
//
// exclude:
//      If this tag is set to 'true', a metric will not be sent for this field.
//
// rate: one of 'gauge', 'counter', 'rate'
//	This tag dictates the metadata.RateType we send.
//
// unit: 'bytes', 'pages', etc
//	This tag dictates the metadata.Unit we send.
//
// metric:
//      This is the metric name which will be sent. If not present, the 'json'
//      tag is sent as the metric name.
//
// Special handling:
//
// Metrics having the json tag suffix of 'in_milliseconds' are automagically
// divided by 1000 and sent as seconds. The suffix is stripped from the name.
//
// Metrics having the json tag suffix of 'in_bytes' are automatically sent as
// gauge bytes. The suffix is stripped from the metric name.
func (s *structProcessor) add(prefix string, st interface{}, ts opentsdb.TagSet) {
	t := reflect.TypeOf(st)
	valueOf := reflect.ValueOf(st)
	for i := 0; i < t.NumField(); i++ {
		field := t.Field(i)
		value := valueOf.Field(i).Interface()
		if field.Tag.Get("exclude") == "true" {
			continue
		}
		var (
			jsonTag    = field.Tag.Get("json")
			metricTag  = field.Tag.Get("metric")
			versionTag = field.Tag.Get("version")
			rateTag    = field.Tag.Get("rate")
			unitTag    = field.Tag.Get("unit")
		)
		metricName := jsonTag
		if metricTag != "" {
			metricName = metricTag
		}
		if metricName == "" {
			slog.Errorf("Unable to determine metric name for field %s. Skipping.", field.Name)
			continue
		}
		if versionTag == "" || strings.HasPrefix(s.elasticVersion, versionTag) {
			switch value := value.(type) {
			case int, float64: // Number types in our structs are only ints and float64s.
				// Turn all millisecond metrics into seconds
				if strings.HasSuffix(metricName, "_in_millis") {
					switch value.(type) {
					case int:
						value = float64(value.(int)) / 1000
					case float64:
						value = value.(float64) / 1000
					}
					unitTag = "seconds"
					metricName = strings.TrimSuffix(metricName, "_in_millis")
				}
				// Set rate and unit for all "_in_bytes" metrics, and strip the "_in_bytes"
				if strings.HasSuffix(metricName, "_in_bytes") {
					if rateTag == "" {
						rateTag = "gauge"
					}
					unitTag = "bytes"
					metricName = strings.TrimSuffix(metricName, "_in_bytes")
				}
				Add(s.md, prefix+"."+metricName, value, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), field.Tag.Get("desc"))
			case string:
				// The json data has a lot of strings, and we don't care about em.
			default:
				// If we hit another struct, recurse
				if reflect.ValueOf(value).Kind() == reflect.Struct {
					s.add(prefix+"."+metricName, value, ts)
				} else {
					slog.Errorf("Field %s for metric %s is non-numeric type. Cannot record as a metric.\n", field.Name, prefix+"."+metricName)
				}
			}
		}
	}
}
示例#5
0
func runService(name string, isDebug bool) {
	errFix := fixEventMessageFile(name) //Temp fix. Remove after a few weeks.
	if errFix != nil {
		slog.Errorf("%s fixEventMessageFile failed: %v", name, errFix)
		return
	}
	if isDebug {
		slog.SetEventLog(debug.New(name), 1)
	} else {
		elog, err := eventlog.Open(name)
		if err != nil {
			return
		}
		slog.SetEventLog(elog, 1)
		defer elog.Close()
	}
	slog.Infof("starting %s service version %v (%v)", name, version.Version, version.VersionSHA)
	run := svc.Run
	if isDebug {
		run = debug.Run
	}
	err := run(name, &s{})
	if err != nil {
		slog.Errorf("%s service failed: %v", name, err)
		return
	}
	slog.Infof("%s service stopped", name)
	os.Exit(0)
}
示例#6
0
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	conn, err := redis.Dial("tcp", server, redis.DialDatabase(db))
	if err != nil {
		return md, err
	}
	defer conn.Close()
	if _, err := conn.Do("CLIENT", "SETNAME", "scollector"); err != nil {
		return md, err
	}
	cursor := 0
	for {
		vals, err := redis.Values(conn.Do("HSCAN", collect.RedisCountersKey, cursor))
		if err != nil {
			return md, err
		}
		if len(vals) != 2 {
			return md, fmt.Errorf("Unexpected number of values")
		}
		cursor, err = redis.Int(vals[0], nil)
		if err != nil {
			return md, err
		}
		pairs, err := redis.StringMap(vals[1], nil)
		if err != nil {
			return md, err
		}
		for mts, val := range pairs {
			parts := strings.Split(mts, ":")
			if len(parts) != 2 {
				slog.Errorf("Invalid metric tag set counter: %s", mts)
				continue
			}
			metric := parts[0]
			tags, err := opentsdb.ParseTags(parts[1])
			if err != nil {
				slog.Errorf("Invalid tags: %s", parts[1])
				continue
			}
			v, err := strconv.Atoi(val)
			if err != nil {
				slog.Errorf("Invalid counter value: %s", val)
				continue
			}
			Add(&md, metric, v, tags, metadata.Counter, metadata.Count, "")
		}
		if cursor == 0 {
			break
		}
	}
	return md, nil
}
示例#7
0
func (s *Schedule) PutMetadata(k metadata.Metakey, v interface{}) {

	isCoreMeta := (k.Name == "desc" || k.Name == "unit" || k.Name == "rate")
	if !isCoreMeta {
		s.metaLock.Lock()
		s.Metadata[k] = &Metavalue{time.Now().UTC(), v}
		s.metaLock.Unlock()
		return
	}
	if k.Metric == "" {
		slog.Error("desc, rate, and unit require metric name")
		return
	}
	strVal, ok := v.(string)
	if !ok {
		slog.Errorf("desc, rate, and unit require value to be string. Found: %s", reflect.TypeOf(v))
		return
	}
	s.metricMetaLock.Lock()
	metricData, ok := s.metricMetadata[k.Metric]
	if !ok {
		metricData = &MetadataMetric{}
		s.metricMetadata[k.Metric] = metricData
	}
	switch k.Name {
	case "desc":
		metricData.Description = strVal
	case "unit":
		metricData.Unit = strVal
	case "rate":
		metricData.Type = strVal
	}
	s.metricMetaLock.Unlock()
}
示例#8
0
文件: funcs.go 项目: jareksm/bosun
func timeTSDBRequest(e *State, T miniprofiler.Timer, req *opentsdb.Request) (s opentsdb.ResponseSet, err error) {
	e.tsdbQueries = append(e.tsdbQueries, *req)
	if e.autods > 0 {
		for _, q := range req.Queries {
			if q.Downsample == "" {
				if err := req.AutoDownsample(e.autods); err != nil {
					return nil, err
				}
			}
		}
	}
	b, _ := json.MarshalIndent(req, "", "  ")
	tries := 1
	for {
		T.StepCustomTiming("tsdb", "query", string(b), func() {
			getFn := func() (interface{}, error) {
				return e.tsdbContext.Query(req)
			}
			var val interface{}
			val, err = e.cache.Get(string(b), getFn)
			s = val.(opentsdb.ResponseSet).Copy()

		})
		if err == nil || tries == tsdbMaxTries {
			break
		}
		slog.Errorf("Error on tsdb query %d: %s", tries, err.Error())
		tries++
	}
	return
}
示例#9
0
文件: check.go 项目: Skyscanner/bosun
func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert) {
	slog.Infof("check alert %v start", a.Name)
	start := utcNow()
	for _, ak := range s.findUnknownAlerts(r.Start, a.Name) {
		r.Events[ak] = &models.Event{Status: models.StUnknown}
	}
	var warns, crits models.AlertKeys
	d, err := s.executeExpr(T, r, a, a.Depends)
	var deps expr.ResultSlice
	if err == nil {
		deps = filterDependencyResults(d)
		crits, err = s.CheckExpr(T, r, a, a.Crit, models.StCritical, nil)
		if err == nil {
			warns, err = s.CheckExpr(T, r, a, a.Warn, models.StWarning, crits)
		}
	}
	unevalCount, unknownCount := markDependenciesUnevaluated(r.Events, deps, a.Name)
	if err != nil {
		slog.Errorf("Error checking alert %s: %s", a.Name, err.Error())
		removeUnknownEvents(r.Events, a.Name)
		s.markAlertError(a.Name, err)
	} else {
		s.markAlertSuccessful(a.Name)
	}
	collect.Put("check.duration", opentsdb.TagSet{"name": a.Name}, time.Since(start).Seconds())
	slog.Infof("check alert %v done (%s): %v crits, %v warns, %v unevaluated, %v unknown", a.Name, time.Since(start), len(crits), len(warns), unevalCount, unknownCount)
}
示例#10
0
func init() {
	registerInit(func(c *conf.Conf) {
		for _, filter := range c.ElasticIndexFilters {
			err := AddElasticIndexFilter(filter)
			if err != nil {
				slog.Errorf("Error processing ElasticIndexFilter: %s", err)
			}
		}
		collectors = append(collectors, &IntervalCollector{
			F: func() (opentsdb.MultiDataPoint, error) {
				return c_elasticsearch(false)
			},
			name:   "elasticsearch",
			Enable: enableURL("http://localhost:9200/"),
		})
		collectors = append(collectors, &IntervalCollector{
			F: func() (opentsdb.MultiDataPoint, error) {
				return c_elasticsearch(true)
			},
			name:     "elasticsearch-indices",
			Interval: time.Minute * 15,
			Enable:   enableURL("http://localhost:9200/"),
		})
	})
}
示例#11
0
文件: check.go 项目: nicollet/bosun
func (s *Schedule) findUnknownAlerts(now time.Time, alert string) []models.AlertKey {
	keys := []models.AlertKey{}
	if utcNow().Sub(bosunStartupTime) < s.SystemConf.GetCheckFrequency() {
		return keys
	}
	if !s.AlertSuccessful(alert) {
		return keys
	}
	a := s.RuleConf.GetAlert(alert)
	t := a.Unknown
	if t == 0 {
		runEvery := s.SystemConf.GetDefaultRunEvery()
		if a.RunEvery != 0 {
			runEvery = a.RunEvery
		}
		t = s.SystemConf.GetCheckFrequency() * 2 * time.Duration(runEvery)
	}
	maxTouched := now.UTC().Unix() - int64(t.Seconds())
	untouched, err := s.DataAccess.State().GetUntouchedSince(alert, maxTouched)
	if err != nil {
		slog.Errorf("Error finding unknown alerts for alert %s: %s.", alert, err)
		return keys
	}
	for _, ak := range untouched {
		if a.Squelch.Squelched(ak.Group()) {
			continue
		}
		keys = append(keys, ak)
	}
	return keys
}
示例#12
0
文件: fastly.go 项目: nicollet/bosun
func c_fastly(c fastlyClient) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	to := time.Now().UTC().Truncate(time.Minute)
	from := to.Add(-15 * time.Minute) // "Minutely data will be delayed by roughly 10 to 15 minutes from the current time -- Fastly Docs"

	// Aggregate
	statsCollection, err := c.GetAggregateStats(from, to)
	if err != nil {
		return md, err
	}
	for _, stats := range statsCollection {
		fastlyReflectAdd(&md, "fastly", "", stats, stats.StartTime, nil)
	}

	// By Service
	services, err := c.GetServices()
	if err != nil {
		return md, err
	}
	for _, service := range services {
		statsCollection, err := c.GetServiceStats(from, to, service.Id)
		if err != nil {
			slog.Errorf("couldn't get stats for service %v with id %v: %v", service.Name, service.Id, err)
			continue
		}
		for _, stats := range statsCollection {
			fastlyReflectAdd(&md, "fastly", "_by_service", stats, stats.StartTime, service.TagSet())
		}
	}

	// By Region
	regions, err := c.GetRegions()
	if err != nil {
		return md, err
	}
	for _, region := range regions {
		statsCollection, err := c.GetRegionStats(from, to, region)
		if err != nil {
			slog.Errorf("couldn't get stats for region %v: %v", region, err)
			continue
		}
		for _, stats := range statsCollection {
			fastlyReflectAdd(&md, "fastly", "_by_region", stats, stats.StartTime, region.TagSet())
		}
	}
	return md, nil
}
示例#13
0
func incrementRedisCounter(data []byte, addr string, conn redis.Conn) {
	if len(data) < 5 {
		slog.Errorf("Insufficient data for increment from %s.", addr)
		return
	}
	r := bytes.NewReader(data)
	var i int32
	err := binary.Read(r, binary.BigEndian, &i)
	if err != nil {
		slog.Error(err)
		return
	}
	mts := string(data[4:])
	if _, err = conn.Do("HINCRBY", RedisCountersKey, mts, i); err != nil {
		slog.Errorf("Error incrementing counter %s by %d. From %s. %s", mts, i, addr, err)
	}
}
示例#14
0
文件: check.go 项目: Skyscanner/bosun
func (s *Schedule) GetUnknownAndUnevaluatedAlertKeys(alert string) (unknown, uneval []models.AlertKey) {
	unknown, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(alert)
	if err != nil {
		slog.Errorf("Error getting unknown/unevaluated alert keys: %s", err)
		return nil, nil
	}
	return unknown, uneval
}
示例#15
0
文件: snmp.go 项目: eswdd/bosun
// snmp_subtree takes an oid and returns all data exactly one level below it. It
// produces an error if there is more than one level below.
func snmp_subtree(host, community, oid string) (map[string]interface{}, error) {
	rows, err := snmp.Walk(host, community, oid)
	if err != nil {
		return nil, err
	}
	m := make(map[string]interface{})
	for rows.Next() {
		key := ""
		var a interface{}
		switch oid {
		case ifHCInBroadcastPkts:
			a = new(big.Int)
			id, err := rows.Scan(&a)
			if err != nil {
				slog.Errorf("Error scanning oid %v on host %v: %v", oid, host, err)
				continue
			}
			switch t := id.(type) {
			case int:
				key = fmt.Sprint(t)
			default:
				return nil, fmt.Errorf("snmp subtree: only one level allowed")
			}
		default:
			id, err := rows.Scan(&a)
			if err != nil {
				slog.Errorf("Error scanning oid %v on host %v: %v", oid, host, err)
				continue
			}
			switch t := id.(type) {
			case int:
				key = fmt.Sprint(t)
			case []int:
				key = snmpOidArrayToString(t)
			default:
				return nil, fmt.Errorf("Unknown key type: %s", reflect.TypeOf(id).String())
			}
		}
		m[key] = a
	}
	if err := rows.Err(); err != nil && err != io.EOF {
		return nil, err
	}
	return m, nil
}
示例#16
0
文件: fastly.go 项目: nicollet/bosun
func fastlyReflectAdd(md *opentsdb.MultiDataPoint, prefix, suffix string, st interface{}, timeStamp int64, ts opentsdb.TagSet) {
	t := reflect.TypeOf(st)
	valueOf := reflect.ValueOf(st)
	for i := 0; i < t.NumField(); i++ {
		field := t.Field(i)
		value := valueOf.Field(i).Interface()
		var (
			jsonTag   = field.Tag.Get("json")
			metricTag = field.Tag.Get("metric")
			rateTag   = field.Tag.Get("rate")
			unitTag   = field.Tag.Get("unit")
			divTag    = field.Tag.Get("div")
			descTag   = field.Tag.Get("desc")
			exclude   = field.Tag.Get("exclude") != ""
		)
		if exclude || descTag == "" {
			continue
		}
		metricName := jsonTag
		if metricTag != "" {
			metricName = metricTag
		}
		if metricName == "" {
			slog.Errorf("Unable to determine metric name for field %s. Skipping.", field.Name)
			continue
		}
		shouldDiv := divTag != ""
		if shouldDiv {
			descTag = fmt.Sprintf("%v %v", descTag, fastlyDivDesc)
		}
		fullMetric := fmt.Sprintf("%v.%v%v", prefix, metricName, suffix)
		switch value := value.(type) {
		case int64, float64:
			var v float64
			if f, found := value.(float64); found {
				v = f
			} else {
				v = float64(value.(int64))
			}
			if shouldDiv {
				v /= 60.0
			}
			AddTS(md, fullMetric, timeStamp, v, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), descTag)
		case string:
			// Floats in strings, I know not why, precision perhaps?
			// err ignored since we expect non number strings in the struct
			if f, err := strconv.ParseFloat(value, 64); err != nil {
				if shouldDiv {
					f /= 60.0
				}
				AddTS(md, fullMetric, timeStamp, f, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), descTag)
			}
		default:
			// Pass since there is no need to recurse
		}
	}
}
示例#17
0
文件: snmp_cisco.go 项目: eswdd/bosun
func c_cisco_nxos(host, community string, cpuIntegrator tsIntegrator) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	ts := opentsdb.TagSet{"host": host}
	// CPU
	if err := ciscoCPU(host, community, ts, cpuIntegrator, &md); err != nil {
		return md, err
	}
	// Memory
	memRaw, err := snmp_subtree(host, community, ciscoBaseOID+cpmCPUTotalEntry)
	if err != nil {
		return md, fmt.Errorf("failed to get cpmCPUTotalEntry (for memory) for host %v: %v", host, err)
	}
	var usedMem, freeMem, totalMem int64
	var usedOk, freeOk bool
	for id, value := range memRaw {
		var v int64
		switch id {
		case "12.1":
			if v, usedOk = value.(int64); usedOk {
				usedMem = v * 2 << 9 // KiB to Bytes
				totalMem += usedMem
				Add(&md, osMemUsed, usedMem, ts, metadata.Gauge, metadata.Bytes, osMemUsedDesc)
			} else {
				slog.Errorf("failed to convert used memory %v to int64 for host %v", value, host)
			}
		case "13.1":
			if v, freeOk = value.(int64); freeOk {
				freeMem = v * 2 << 9
				totalMem += freeMem
				Add(&md, osMemFree, freeMem, ts, metadata.Gauge, metadata.Bytes, osMemFreeDesc)
			} else {
				slog.Errorf("failed to convert free memory %v to int64 for host %v", value, host)
			}
		}
	}
	if usedOk && freeOk {
		Add(&md, osMemTotal, totalMem, ts, metadata.Gauge, metadata.Bytes, osMemTotalDesc)
		Add(&md, osMemPctFree, int64(float64(freeMem)/float64(totalMem)*100), ts, metadata.Gauge, metadata.Pct, osMemPctFreeDesc)
	} else {
		slog.Errorf("failed to get both free and used memory for host %v", host)
	}
	return md, nil
}
示例#18
0
文件: check.go 项目: nicollet/bosun
func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert) (cancelled bool) {
	slog.Infof("check alert %v start", a.Name)
	start := utcNow()
	for _, ak := range s.findUnknownAlerts(r.Start, a.Name) {
		r.Events[ak] = &models.Event{Status: models.StUnknown}
	}
	var warns, crits models.AlertKeys
	type res struct {
		results *expr.Results
		error   error
	}
	// buffered channel so go func that runs executeExpr won't leak if the Check is cancelled
	// by the closing of the schedule
	rc := make(chan res, 1)
	var d *expr.Results
	var err error
	go func() {
		d, err := s.executeExpr(T, r, a, a.Depends)
		rc <- res{d, err} // this will hang forever if the channel isn't buffered since nothing will ever receieve from rc
	}()
	select {
	case res := <-rc:
		d = res.results
		err = res.error
	// If the schedule closes before the expression has finised executing, we abandon the
	// execution of the expression
	case <-s.runnerContext.Done():
		return true
	}
	var deps expr.ResultSlice
	if err == nil {
		deps = filterDependencyResults(d)
		crits, err, cancelled = s.CheckExpr(T, r, a, a.Crit, models.StCritical, nil)
		if err == nil && !cancelled {
			warns, err, cancelled = s.CheckExpr(T, r, a, a.Warn, models.StWarning, crits)
		}
	}
	if cancelled {
		return true
	}
	unevalCount, unknownCount := markDependenciesUnevaluated(r.Events, deps, a.Name)
	if err != nil {
		slog.Errorf("Error checking alert %s: %s", a.Name, err.Error())
		removeUnknownEvents(r.Events, a.Name)
		s.markAlertError(a.Name, err)
	} else {
		s.markAlertSuccessful(a.Name)
	}
	collect.Put("check.duration", opentsdb.TagSet{"name": a.Name}, time.Since(start).Seconds())
	slog.Infof("check alert %v done (%s): %v crits, %v warns, %v unevaluated, %v unknown", a.Name, time.Since(start), len(crits), len(warns), unevalCount, unknownCount)
	return false
}
示例#19
0
// AddTS is the same as Add but lets you specify the timestamp
func AddTS(md *opentsdb.MultiDataPoint, name string, ts int64, value interface{}, t opentsdb.TagSet, rate metadata.RateType, unit metadata.Unit, desc string) {
	// Check if we really want that metric
	if skipMetric(name) {
		return
	}

	tags := t.Copy()
	if host, present := tags["host"]; !present {
		tags["host"] = util.Hostname
	} else if host == "" {
		delete(tags, "host")
	}
	// if tags are not cleanable, log a message and skip it
	if err := tags.Clean(); err != nil {
		line := ""
		//attempt to log where Add was called from
		if _, filename, l, ok := runtime.Caller(1); ok {
			if filepath.Base(filename) == "collectors.go" {
				_, filename, l, ok = runtime.Caller(2)
			}
			if ok {
				line = fmt.Sprintf("%s:%d", filepath.Base(filename), l)
			}
		}
		slog.Errorf("Invalid tagset discovered: %s. Skipping datapoint. Added from: %s", tags.String(), line)
		return
	}
	if rate != metadata.Unknown {
		metadata.AddMeta(name, nil, "rate", rate, false)
	}
	if unit != metadata.None {
		metadata.AddMeta(name, nil, "unit", unit, false)
	}
	if desc != "" {
		metadata.AddMeta(name, tags, "desc", desc, false)
	}
	tags = AddTags.Copy().Merge(tags)
	if b, ok := value.(bool); ok {
		if b {
			value = 1
		} else {
			value = 0
		}
	}
	d := opentsdb.DataPoint{
		Metric:    name,
		Timestamp: ts,
		Value:     value,
		Tags:      tags,
	}
	*md = append(*md, &d)
}
示例#20
0
文件: disk_linux.go 项目: eswdd/bosun
func isPseudoFS(name string) (res bool) {
	err := readLine("/proc/filesystems", func(s string) error {
		ss := strings.Split(s, "\t")
		if len(ss) == 2 && ss[1] == name && ss[0] == "nodev" {
			res = true
		}
		return nil
	})
	if err != nil {
		slog.Errorf("can not read '/proc/filesystems': %v", err)
	}
	return
}
示例#21
0
func isPseudoFS(name string) (res bool) {
	err := readLine("/proc/filesystems", func(s string) error {
		if strings.Contains(s, name) && strings.Contains(s, "nodev") {
			res = true
			return nil
		}
		return nil
	})
	if err != nil {
		slog.Errorf("can not read '/proc/filesystems': %v", err)
	}
	return
}
示例#22
0
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	conn, err := redis.Dial("tcp", server, redis.DialDatabase(db))
	if err != nil {
		return md, slog.Wrap(err)
	}
	defer conn.Close()

	//do a dance to detect proper hscan command for ledis or redis
	hscanCmd := "XHSCAN"
	info, err := redis.String(conn.Do("info", "server"))
	if err != nil {
		return md, slog.Wrap(err)
	}
	if strings.Contains(info, "redis_version") {
		hscanCmd = "HSCAN"
	}

	cursor := "0"
	for {
		vals, err := redis.Values(conn.Do(hscanCmd, collect.RedisCountersKey, cursor))
		if err != nil {
			return md, slog.Wrap(err)
		}
		if len(vals) != 2 {
			return md, fmt.Errorf("Unexpected number of values")
		}
		cursor, err = redis.String(vals[0], nil)
		if err != nil {
			return md, slog.Wrap(err)
		}
		pairs, err := redis.StringMap(vals[1], nil)
		if err != nil {
			return md, slog.Wrap(err)
		}
		for key, val := range pairs {
			ak := models.AlertKey(key)

			v, err := strconv.Atoi(val)
			if err != nil {
				slog.Errorf("Invalid counter value: %s", val)
				continue
			}
			Add(&md, ak.Name(), v, ak.Group(), metadata.Counter, metadata.Count, "")
		}
		if cursor == "" || cursor == "0" {
			break
		}
	}
	return md, nil
}
示例#23
0
文件: snmp_ips.go 项目: eswdd/bosun
func c_snmp_ips(community, host string) (opentsdb.MultiDataPoint, error) {
	ifIPAdEntAddrRaw, err := snmp_subtree(host, community, ifIPAdEntAddr)
	if err != nil {
		return nil, err
	}
	ipAdEnts := make(map[string]*ipAdEntAddr)
	for id, value := range ifIPAdEntAddrRaw {
		// Split entry type id from ip address
		sp := strings.SplitN(id, ".", 2)
		if len(sp) != 2 {
			slog.Errorln("unexpected length of snmp resonse")
		}
		typeId := sp[0]
		address := sp[1]
		if _, ok := ipAdEnts[address]; !ok {
			ipAdEnts[address] = &ipAdEntAddr{}
		}
		switch typeId {
		case "1":
			if v, ok := value.([]byte); ok {
				ipAdEnts[address].IP = v
			}
		case "2":
			if v, ok := value.(int64); ok {
				ipAdEnts[address].InterfaceId = v
			}
		case "3":
			if v, ok := value.([]byte); ok {
				ipAdEnts[address].Mask = v
			}
		}
	}
	ipsByInt := make(map[int64][]net.IPNet)
	for _, ipNet := range ipAdEnts {
		ipsByInt[ipNet.InterfaceId] = append(ipsByInt[ipNet.InterfaceId], ipNet.IPNet)
	}
	for intId, ipNets := range ipsByInt {
		var ips []string
		for _, ipNet := range ipNets {
			ips = append(ips, ipNet.String())
		}
		sort.Strings(ips)
		j, err := json.Marshal(ips)
		if err != nil {
			slog.Errorf("error marshaling ips for host %v: %v", host, err)
		}
		metadata.AddMeta("", opentsdb.TagSet{"host": host, "iface": fmt.Sprintf("%v", intId)}, "addresses", string(j), false)
	}
	return nil, nil
}
示例#24
0
/*
 Listen on the specified udp port for events.
 This provides long term aggrigation for sparse events.
 wire format: opcode(1 byte) | data

Opcodes:
 1: increment - increments a redis counter for the specified metric/tag set
     data: count(4 bytes signed int) | metric:tag1=foo,tag2=bar
*/
func ListenUdp(port int, redisHost string, redisDb int) error {
	addr := net.UDPAddr{
		Port: port,
		IP:   net.ParseIP("127.0.0.1"),
	}
	conn, err := net.ListenUDP("udp", &addr)
	if err != nil {
		return err
	}
	pool := newRedisPool(redisHost, redisDb)
	for {
		buf := make([]byte, 1025)
		n, addr, err := conn.ReadFromUDP(buf)
		if err != nil {
			slog.Error(err)
			continue
		}
		if n == len(buf) { // if we get a full buffer, assume some was truncated.
			slog.Errorf("Too large a udp packet received from: %s. Skipping.", addr.String())
			continue
		}
		Add("udp.packets", opentsdb.TagSet{}, 1)
		go func(data []byte, addr string) {
			c := pool.Get()
			defer c.Close()
			if len(data) == 0 {
				slog.Errorf("Empty packet received from %s.", addr)
			}
			switch data[0] {
			case 1:
				incrementRedisCounter(data[1:], addr, c)
			default:
				slog.Errorf("Unknown opcode %d from %s.", data[0], addr)
			}
		}(buf[:n], addr.String())
	}
}
示例#25
0
func DatabaseCollect(c conf.Database, collectorStatsChan chan<- *ContinuousCollectorStats) {
	queries := DatabaseGetQueries(c)

	if len(queries) < 1 {
		slog.Errorf("%v: %v: %v", "Database", c.Type, "no queries")
	}

	db, err := DatabaseConnect(c)
	if err != nil {
		return
	}

	defer db.Close()

	var waitGroup sync.WaitGroup
	queryChan := make(chan *DatabaseQuery, 1)

	waitGroup.Add(c.MaxOpenConns)
	for i := 0; i < c.MaxOpenConns; i++ {
		go func() {
			DatabaseRunQueryWorker(c, db, queryChan, collectorStatsChan)
			waitGroup.Done()
		}()
	}

Loop:
	for {
		nextTime := queries[0].next
		// maybe change to a priority queue instead of checking each slice?
		for i := range queries {
			if time.Now().After(queries[i].next) {
				queryChan <- &queries[i]
				queries[i].next = time.Now().Add(queries[i].interval)
				if queries[i].next.Before(nextTime) {
					nextTime = queries[i].next
				}
			}
		}
		next := time.After(nextTime.Sub(time.Now()))
		select {
		case <-next:
		case <-ContinuousCollectorVars.quit:
			break Loop
		}
	}

	close(queryChan)
	waitGroup.Wait()
}
示例#26
0
func c_cadvisor(c *client.Client, config *conf.Cadvisor) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint

	containers, err := c.AllDockerContainers(&v1.ContainerInfoRequest{NumStats: 1})
	if err != nil {
		slog.Errorf("Error fetching containers from cadvisor: %v", err)
		return md, err
	}

	for _, container := range containers {
		statsForContainer(&md, &container, config)
	}

	return md, nil
}
示例#27
0
func (c *IntervalCollector) Run(dpchan chan<- *opentsdb.DataPoint, quit <-chan struct{}) {
	if c.Enable != nil {
		go func() {
			for {
				next := time.After(time.Minute * 5)
				c.Lock()
				c.enabled = c.Enable()
				c.Unlock()
				<-next
			}
		}()
	}

	for {
		interval := c.Interval
		if interval == 0 {
			interval = DefaultFreq
		}
		next := time.After(interval)
		if c.Enabled() {
			timeStart := time.Now()
			md, err := c.F()
			timeFinish := time.Since(timeStart)
			result := 0
			if err != nil {
				slog.Errorf("%v: %v", c.Name(), err)
				result = 1
			}
			if !collect.DisableDefaultCollectors {
				tags := opentsdb.TagSet{"collector": c.Name(), "os": runtime.GOOS}
				Add(&md, "scollector.collector.duration", timeFinish.Seconds(), tags, metadata.Gauge, metadata.Second, "Duration in seconds for each collector run.")
				Add(&md, "scollector.collector.error", result, tags, metadata.Gauge, metadata.Ok, "Status of collector run. 1=Error, 0=Success.")
			}
			for _, dp := range md {
				c.ApplyTagOverrides(dp.Tags)
				dpchan <- dp
			}
		}
		select {
		case <-next:
		case <-quit:
			return
		}

	}
}
示例#28
0
文件: check.go 项目: Skyscanner/bosun
// RunHistory processes an event history and triggers notifications if needed.
func (s *Schedule) RunHistory(r *RunHistory) {
	checkNotify := false
	silenced := s.Silenced()
	for ak, event := range r.Events {
		shouldNotify, err := s.runHistory(r, ak, event, silenced)
		checkNotify = checkNotify || shouldNotify
		if err != nil {
			slog.Errorf("Error in runHistory for %s. %s.", ak, err)
		}
	}
	if checkNotify && s.nc != nil {
		select {
		case s.nc <- true:
		default:
		}
	}
}
示例#29
0
func checkMdadmLinux() (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint

	volumes, err := filepath.Glob("/dev/md*")
	if err != nil {
		return md, err
	}
	for _, volume := range filterVolumes(volumes) {
		detail, err := examineMdadmVolume(volume)
		if err != nil {
			slog.Errorf("mdadm: can't parse %s data, %s", volume, err)
			continue
		}
		addMdadmMetric(&md, volume, detail)
	}
	return md, nil
}
示例#30
0
func (m *s) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) {
	const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown
	changes <- svc.Status{State: svc.StartPending}
	changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted}
loop:
	for c := range r {
		switch c.Cmd {
		case svc.Interrogate:
			changes <- c.CurrentStatus
		case svc.Stop, svc.Shutdown:
			break loop
		default:
			slog.Errorf("unexpected control request #%d", c)
		}
	}
	changes <- svc.Status{State: svc.StopPending}
	return
}