func DatabaseConnect(c conf.Database) (*sql.DB, error) { var db *sql.DB var err error if c.Type == "mysql" { if c.Password == "" { db, err = sql.Open("mysql", c.Username+"@"+c.Protocol+"("+c.Address+":"+strconv.Itoa(c.Port)+")/") } else { db, err = sql.Open("mysql", c.Username+":"+c.Password+"@"+c.Protocol+"("+c.Address+":"+strconv.Itoa(c.Port)+")/") } } if err != nil { slog.Errorf("%v: %v: %v: %v", "Database", c.Type, "connect open error", err) return nil, fmt.Errorf("%v: %v", "connect open error", err) } // check if can connect to database, if fail, check again every minute until connected for { err = db.Ping() if err == nil { break } slog.Errorf("%v: %v: %v: %v", "Database", c.Type, "connect ping error", err) next := time.After(time.Minute) select { case <-next: case <-ContinuousCollectorVars.quit: db.Close() return nil, fmt.Errorf("connect while quitting") } } return db, nil }
// Command executes the named program with the given arguments. If it does not // exit within timeout, it is sent SIGINT (if supported by Go). After // another timeout, it is killed. func Command(timeout time.Duration, stdin io.Reader, name string, arg ...string) (io.Reader, error) { if _, err := exec.LookPath(name); err != nil { return nil, ErrPath } if Debug { slog.Infof("executing command: %v %v", name, arg) } c := exec.Command(name, arg...) b := &bytes.Buffer{} c.Stdout = b c.Stdin = stdin if err := c.Start(); err != nil { return nil, err } timedOut := false intTimer := time.AfterFunc(timeout, func() { slog.Errorf("Process taking too long. Interrupting: %s %s", name, strings.Join(arg, " ")) c.Process.Signal(os.Interrupt) timedOut = true }) killTimer := time.AfterFunc(timeout*2, func() { slog.Errorf("Process taking too long. Killing: %s %s", name, strings.Join(arg, " ")) c.Process.Signal(os.Kill) timedOut = true }) err := c.Wait() intTimer.Stop() killTimer.Stop() if timedOut { return nil, ErrTimeout } return b, err }
func (s *Schedule) save() { if s.db == nil { return } s.Lock("Save") store := map[string]interface{}{ dbMetric: s.Search.Read.Metric, dbTagk: s.Search.Read.Tagk, dbTagv: s.Search.Read.Tagv, dbMetricTags: s.Search.Read.MetricTags, dbNotifications: s.Notifications, dbSilence: s.Silence, dbStatus: s.status, dbMetadata: s.Metadata, dbIncidents: s.Incidents, } tostore := make(map[string][]byte) for name, data := range store { f := new(bytes.Buffer) gz := gzip.NewWriter(f) cw := &counterWriter{w: gz} enc := gob.NewEncoder(cw) if err := enc.Encode(data); err != nil { slog.Errorf("error saving %s: %v", name, err) s.Unlock() return } if err := gz.Flush(); err != nil { slog.Errorf("gzip flush error saving %s: %v", name, err) } if err := gz.Close(); err != nil { slog.Errorf("gzip close error saving %s: %v", name, err) } tostore[name] = f.Bytes() slog.Infof("wrote %s: %v", name, conf.ByteSize(cw.written)) collect.Put("statefile.size", opentsdb.TagSet{"object": name}, cw.written) } s.Unlock() err := s.db.Update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists([]byte(dbBucket)) if err != nil { return err } for name, data := range tostore { if err := b.Put([]byte(name), data); err != nil { return err } } return nil }) if err != nil { slog.Errorf("save db update error: %v", err) return } fi, err := os.Stat(s.Conf.StateFile) if err == nil { collect.Put("statefile.size", opentsdb.TagSet{"object": "total"}, fi.Size()) } slog.Infoln("save to db complete") }
// structProcessor.add() takes in a metric name prefix, an arbitrary struct, and a tagset. // The processor recurses through the struct and builds metrics. The field tags direct how // the field should be processed, as well as the metadata for the resulting metric. // // The field tags used are described as follows: // // version: typically set to '1' or '2'. // This is compared against the elastic cluster version. If the version from the tag // does not match the version in production, the metric will not be sent for this field. // // exclude: // If this tag is set to 'true', a metric will not be sent for this field. // // rate: one of 'gauge', 'counter', 'rate' // This tag dictates the metadata.RateType we send. // // unit: 'bytes', 'pages', etc // This tag dictates the metadata.Unit we send. // // metric: // This is the metric name which will be sent. If not present, the 'json' // tag is sent as the metric name. // // Special handling: // // Metrics having the json tag suffix of 'in_milliseconds' are automagically // divided by 1000 and sent as seconds. The suffix is stripped from the name. // // Metrics having the json tag suffix of 'in_bytes' are automatically sent as // gauge bytes. The suffix is stripped from the metric name. func (s *structProcessor) add(prefix string, st interface{}, ts opentsdb.TagSet) { t := reflect.TypeOf(st) valueOf := reflect.ValueOf(st) for i := 0; i < t.NumField(); i++ { field := t.Field(i) value := valueOf.Field(i).Interface() if field.Tag.Get("exclude") == "true" { continue } var ( jsonTag = field.Tag.Get("json") metricTag = field.Tag.Get("metric") versionTag = field.Tag.Get("version") rateTag = field.Tag.Get("rate") unitTag = field.Tag.Get("unit") ) metricName := jsonTag if metricTag != "" { metricName = metricTag } if metricName == "" { slog.Errorf("Unable to determine metric name for field %s. Skipping.", field.Name) continue } if versionTag == "" || strings.HasPrefix(s.elasticVersion, versionTag) { switch value := value.(type) { case int, float64: // Number types in our structs are only ints and float64s. // Turn all millisecond metrics into seconds if strings.HasSuffix(metricName, "_in_millis") { switch value.(type) { case int: value = float64(value.(int)) / 1000 case float64: value = value.(float64) / 1000 } unitTag = "seconds" metricName = strings.TrimSuffix(metricName, "_in_millis") } // Set rate and unit for all "_in_bytes" metrics, and strip the "_in_bytes" if strings.HasSuffix(metricName, "_in_bytes") { if rateTag == "" { rateTag = "gauge" } unitTag = "bytes" metricName = strings.TrimSuffix(metricName, "_in_bytes") } Add(s.md, prefix+"."+metricName, value, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), field.Tag.Get("desc")) case string: // The json data has a lot of strings, and we don't care about em. default: // If we hit another struct, recurse if reflect.ValueOf(value).Kind() == reflect.Struct { s.add(prefix+"."+metricName, value, ts) } else { slog.Errorf("Field %s for metric %s is non-numeric type. Cannot record as a metric.\n", field.Name, prefix+"."+metricName) } } } } }
func runService(name string, isDebug bool) { errFix := fixEventMessageFile(name) //Temp fix. Remove after a few weeks. if errFix != nil { slog.Errorf("%s fixEventMessageFile failed: %v", name, errFix) return } if isDebug { slog.SetEventLog(debug.New(name), 1) } else { elog, err := eventlog.Open(name) if err != nil { return } slog.SetEventLog(elog, 1) defer elog.Close() } slog.Infof("starting %s service version %v (%v)", name, version.Version, version.VersionSHA) run := svc.Run if isDebug { run = debug.Run } err := run(name, &s{}) if err != nil { slog.Errorf("%s service failed: %v", name, err) return } slog.Infof("%s service stopped", name) os.Exit(0) }
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint conn, err := redis.Dial("tcp", server, redis.DialDatabase(db)) if err != nil { return md, err } defer conn.Close() if _, err := conn.Do("CLIENT", "SETNAME", "scollector"); err != nil { return md, err } cursor := 0 for { vals, err := redis.Values(conn.Do("HSCAN", collect.RedisCountersKey, cursor)) if err != nil { return md, err } if len(vals) != 2 { return md, fmt.Errorf("Unexpected number of values") } cursor, err = redis.Int(vals[0], nil) if err != nil { return md, err } pairs, err := redis.StringMap(vals[1], nil) if err != nil { return md, err } for mts, val := range pairs { parts := strings.Split(mts, ":") if len(parts) != 2 { slog.Errorf("Invalid metric tag set counter: %s", mts) continue } metric := parts[0] tags, err := opentsdb.ParseTags(parts[1]) if err != nil { slog.Errorf("Invalid tags: %s", parts[1]) continue } v, err := strconv.Atoi(val) if err != nil { slog.Errorf("Invalid counter value: %s", val) continue } Add(&md, metric, v, tags, metadata.Counter, metadata.Count, "") } if cursor == 0 { break } } return md, nil }
func (s *Schedule) PutMetadata(k metadata.Metakey, v interface{}) { isCoreMeta := (k.Name == "desc" || k.Name == "unit" || k.Name == "rate") if !isCoreMeta { s.metaLock.Lock() s.Metadata[k] = &Metavalue{time.Now().UTC(), v} s.metaLock.Unlock() return } if k.Metric == "" { slog.Error("desc, rate, and unit require metric name") return } strVal, ok := v.(string) if !ok { slog.Errorf("desc, rate, and unit require value to be string. Found: %s", reflect.TypeOf(v)) return } s.metricMetaLock.Lock() metricData, ok := s.metricMetadata[k.Metric] if !ok { metricData = &MetadataMetric{} s.metricMetadata[k.Metric] = metricData } switch k.Name { case "desc": metricData.Description = strVal case "unit": metricData.Unit = strVal case "rate": metricData.Type = strVal } s.metricMetaLock.Unlock() }
func timeTSDBRequest(e *State, T miniprofiler.Timer, req *opentsdb.Request) (s opentsdb.ResponseSet, err error) { e.tsdbQueries = append(e.tsdbQueries, *req) if e.autods > 0 { for _, q := range req.Queries { if q.Downsample == "" { if err := req.AutoDownsample(e.autods); err != nil { return nil, err } } } } b, _ := json.MarshalIndent(req, "", " ") tries := 1 for { T.StepCustomTiming("tsdb", "query", string(b), func() { getFn := func() (interface{}, error) { return e.tsdbContext.Query(req) } var val interface{} val, err = e.cache.Get(string(b), getFn) s = val.(opentsdb.ResponseSet).Copy() }) if err == nil || tries == tsdbMaxTries { break } slog.Errorf("Error on tsdb query %d: %s", tries, err.Error()) tries++ } return }
func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert) { slog.Infof("check alert %v start", a.Name) start := utcNow() for _, ak := range s.findUnknownAlerts(r.Start, a.Name) { r.Events[ak] = &models.Event{Status: models.StUnknown} } var warns, crits models.AlertKeys d, err := s.executeExpr(T, r, a, a.Depends) var deps expr.ResultSlice if err == nil { deps = filterDependencyResults(d) crits, err = s.CheckExpr(T, r, a, a.Crit, models.StCritical, nil) if err == nil { warns, err = s.CheckExpr(T, r, a, a.Warn, models.StWarning, crits) } } unevalCount, unknownCount := markDependenciesUnevaluated(r.Events, deps, a.Name) if err != nil { slog.Errorf("Error checking alert %s: %s", a.Name, err.Error()) removeUnknownEvents(r.Events, a.Name) s.markAlertError(a.Name, err) } else { s.markAlertSuccessful(a.Name) } collect.Put("check.duration", opentsdb.TagSet{"name": a.Name}, time.Since(start).Seconds()) slog.Infof("check alert %v done (%s): %v crits, %v warns, %v unevaluated, %v unknown", a.Name, time.Since(start), len(crits), len(warns), unevalCount, unknownCount) }
func init() { registerInit(func(c *conf.Conf) { for _, filter := range c.ElasticIndexFilters { err := AddElasticIndexFilter(filter) if err != nil { slog.Errorf("Error processing ElasticIndexFilter: %s", err) } } collectors = append(collectors, &IntervalCollector{ F: func() (opentsdb.MultiDataPoint, error) { return c_elasticsearch(false) }, name: "elasticsearch", Enable: enableURL("http://localhost:9200/"), }) collectors = append(collectors, &IntervalCollector{ F: func() (opentsdb.MultiDataPoint, error) { return c_elasticsearch(true) }, name: "elasticsearch-indices", Interval: time.Minute * 15, Enable: enableURL("http://localhost:9200/"), }) }) }
func (s *Schedule) findUnknownAlerts(now time.Time, alert string) []models.AlertKey { keys := []models.AlertKey{} if utcNow().Sub(bosunStartupTime) < s.SystemConf.GetCheckFrequency() { return keys } if !s.AlertSuccessful(alert) { return keys } a := s.RuleConf.GetAlert(alert) t := a.Unknown if t == 0 { runEvery := s.SystemConf.GetDefaultRunEvery() if a.RunEvery != 0 { runEvery = a.RunEvery } t = s.SystemConf.GetCheckFrequency() * 2 * time.Duration(runEvery) } maxTouched := now.UTC().Unix() - int64(t.Seconds()) untouched, err := s.DataAccess.State().GetUntouchedSince(alert, maxTouched) if err != nil { slog.Errorf("Error finding unknown alerts for alert %s: %s.", alert, err) return keys } for _, ak := range untouched { if a.Squelch.Squelched(ak.Group()) { continue } keys = append(keys, ak) } return keys }
func c_fastly(c fastlyClient) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint to := time.Now().UTC().Truncate(time.Minute) from := to.Add(-15 * time.Minute) // "Minutely data will be delayed by roughly 10 to 15 minutes from the current time -- Fastly Docs" // Aggregate statsCollection, err := c.GetAggregateStats(from, to) if err != nil { return md, err } for _, stats := range statsCollection { fastlyReflectAdd(&md, "fastly", "", stats, stats.StartTime, nil) } // By Service services, err := c.GetServices() if err != nil { return md, err } for _, service := range services { statsCollection, err := c.GetServiceStats(from, to, service.Id) if err != nil { slog.Errorf("couldn't get stats for service %v with id %v: %v", service.Name, service.Id, err) continue } for _, stats := range statsCollection { fastlyReflectAdd(&md, "fastly", "_by_service", stats, stats.StartTime, service.TagSet()) } } // By Region regions, err := c.GetRegions() if err != nil { return md, err } for _, region := range regions { statsCollection, err := c.GetRegionStats(from, to, region) if err != nil { slog.Errorf("couldn't get stats for region %v: %v", region, err) continue } for _, stats := range statsCollection { fastlyReflectAdd(&md, "fastly", "_by_region", stats, stats.StartTime, region.TagSet()) } } return md, nil }
func incrementRedisCounter(data []byte, addr string, conn redis.Conn) { if len(data) < 5 { slog.Errorf("Insufficient data for increment from %s.", addr) return } r := bytes.NewReader(data) var i int32 err := binary.Read(r, binary.BigEndian, &i) if err != nil { slog.Error(err) return } mts := string(data[4:]) if _, err = conn.Do("HINCRBY", RedisCountersKey, mts, i); err != nil { slog.Errorf("Error incrementing counter %s by %d. From %s. %s", mts, i, addr, err) } }
func (s *Schedule) GetUnknownAndUnevaluatedAlertKeys(alert string) (unknown, uneval []models.AlertKey) { unknown, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(alert) if err != nil { slog.Errorf("Error getting unknown/unevaluated alert keys: %s", err) return nil, nil } return unknown, uneval }
// snmp_subtree takes an oid and returns all data exactly one level below it. It // produces an error if there is more than one level below. func snmp_subtree(host, community, oid string) (map[string]interface{}, error) { rows, err := snmp.Walk(host, community, oid) if err != nil { return nil, err } m := make(map[string]interface{}) for rows.Next() { key := "" var a interface{} switch oid { case ifHCInBroadcastPkts: a = new(big.Int) id, err := rows.Scan(&a) if err != nil { slog.Errorf("Error scanning oid %v on host %v: %v", oid, host, err) continue } switch t := id.(type) { case int: key = fmt.Sprint(t) default: return nil, fmt.Errorf("snmp subtree: only one level allowed") } default: id, err := rows.Scan(&a) if err != nil { slog.Errorf("Error scanning oid %v on host %v: %v", oid, host, err) continue } switch t := id.(type) { case int: key = fmt.Sprint(t) case []int: key = snmpOidArrayToString(t) default: return nil, fmt.Errorf("Unknown key type: %s", reflect.TypeOf(id).String()) } } m[key] = a } if err := rows.Err(); err != nil && err != io.EOF { return nil, err } return m, nil }
func fastlyReflectAdd(md *opentsdb.MultiDataPoint, prefix, suffix string, st interface{}, timeStamp int64, ts opentsdb.TagSet) { t := reflect.TypeOf(st) valueOf := reflect.ValueOf(st) for i := 0; i < t.NumField(); i++ { field := t.Field(i) value := valueOf.Field(i).Interface() var ( jsonTag = field.Tag.Get("json") metricTag = field.Tag.Get("metric") rateTag = field.Tag.Get("rate") unitTag = field.Tag.Get("unit") divTag = field.Tag.Get("div") descTag = field.Tag.Get("desc") exclude = field.Tag.Get("exclude") != "" ) if exclude || descTag == "" { continue } metricName := jsonTag if metricTag != "" { metricName = metricTag } if metricName == "" { slog.Errorf("Unable to determine metric name for field %s. Skipping.", field.Name) continue } shouldDiv := divTag != "" if shouldDiv { descTag = fmt.Sprintf("%v %v", descTag, fastlyDivDesc) } fullMetric := fmt.Sprintf("%v.%v%v", prefix, metricName, suffix) switch value := value.(type) { case int64, float64: var v float64 if f, found := value.(float64); found { v = f } else { v = float64(value.(int64)) } if shouldDiv { v /= 60.0 } AddTS(md, fullMetric, timeStamp, v, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), descTag) case string: // Floats in strings, I know not why, precision perhaps? // err ignored since we expect non number strings in the struct if f, err := strconv.ParseFloat(value, 64); err != nil { if shouldDiv { f /= 60.0 } AddTS(md, fullMetric, timeStamp, f, ts, metadata.RateType(rateTag), metadata.Unit(unitTag), descTag) } default: // Pass since there is no need to recurse } } }
func c_cisco_nxos(host, community string, cpuIntegrator tsIntegrator) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint ts := opentsdb.TagSet{"host": host} // CPU if err := ciscoCPU(host, community, ts, cpuIntegrator, &md); err != nil { return md, err } // Memory memRaw, err := snmp_subtree(host, community, ciscoBaseOID+cpmCPUTotalEntry) if err != nil { return md, fmt.Errorf("failed to get cpmCPUTotalEntry (for memory) for host %v: %v", host, err) } var usedMem, freeMem, totalMem int64 var usedOk, freeOk bool for id, value := range memRaw { var v int64 switch id { case "12.1": if v, usedOk = value.(int64); usedOk { usedMem = v * 2 << 9 // KiB to Bytes totalMem += usedMem Add(&md, osMemUsed, usedMem, ts, metadata.Gauge, metadata.Bytes, osMemUsedDesc) } else { slog.Errorf("failed to convert used memory %v to int64 for host %v", value, host) } case "13.1": if v, freeOk = value.(int64); freeOk { freeMem = v * 2 << 9 totalMem += freeMem Add(&md, osMemFree, freeMem, ts, metadata.Gauge, metadata.Bytes, osMemFreeDesc) } else { slog.Errorf("failed to convert free memory %v to int64 for host %v", value, host) } } } if usedOk && freeOk { Add(&md, osMemTotal, totalMem, ts, metadata.Gauge, metadata.Bytes, osMemTotalDesc) Add(&md, osMemPctFree, int64(float64(freeMem)/float64(totalMem)*100), ts, metadata.Gauge, metadata.Pct, osMemPctFreeDesc) } else { slog.Errorf("failed to get both free and used memory for host %v", host) } return md, nil }
func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert) (cancelled bool) { slog.Infof("check alert %v start", a.Name) start := utcNow() for _, ak := range s.findUnknownAlerts(r.Start, a.Name) { r.Events[ak] = &models.Event{Status: models.StUnknown} } var warns, crits models.AlertKeys type res struct { results *expr.Results error error } // buffered channel so go func that runs executeExpr won't leak if the Check is cancelled // by the closing of the schedule rc := make(chan res, 1) var d *expr.Results var err error go func() { d, err := s.executeExpr(T, r, a, a.Depends) rc <- res{d, err} // this will hang forever if the channel isn't buffered since nothing will ever receieve from rc }() select { case res := <-rc: d = res.results err = res.error // If the schedule closes before the expression has finised executing, we abandon the // execution of the expression case <-s.runnerContext.Done(): return true } var deps expr.ResultSlice if err == nil { deps = filterDependencyResults(d) crits, err, cancelled = s.CheckExpr(T, r, a, a.Crit, models.StCritical, nil) if err == nil && !cancelled { warns, err, cancelled = s.CheckExpr(T, r, a, a.Warn, models.StWarning, crits) } } if cancelled { return true } unevalCount, unknownCount := markDependenciesUnevaluated(r.Events, deps, a.Name) if err != nil { slog.Errorf("Error checking alert %s: %s", a.Name, err.Error()) removeUnknownEvents(r.Events, a.Name) s.markAlertError(a.Name, err) } else { s.markAlertSuccessful(a.Name) } collect.Put("check.duration", opentsdb.TagSet{"name": a.Name}, time.Since(start).Seconds()) slog.Infof("check alert %v done (%s): %v crits, %v warns, %v unevaluated, %v unknown", a.Name, time.Since(start), len(crits), len(warns), unevalCount, unknownCount) return false }
// AddTS is the same as Add but lets you specify the timestamp func AddTS(md *opentsdb.MultiDataPoint, name string, ts int64, value interface{}, t opentsdb.TagSet, rate metadata.RateType, unit metadata.Unit, desc string) { // Check if we really want that metric if skipMetric(name) { return } tags := t.Copy() if host, present := tags["host"]; !present { tags["host"] = util.Hostname } else if host == "" { delete(tags, "host") } // if tags are not cleanable, log a message and skip it if err := tags.Clean(); err != nil { line := "" //attempt to log where Add was called from if _, filename, l, ok := runtime.Caller(1); ok { if filepath.Base(filename) == "collectors.go" { _, filename, l, ok = runtime.Caller(2) } if ok { line = fmt.Sprintf("%s:%d", filepath.Base(filename), l) } } slog.Errorf("Invalid tagset discovered: %s. Skipping datapoint. Added from: %s", tags.String(), line) return } if rate != metadata.Unknown { metadata.AddMeta(name, nil, "rate", rate, false) } if unit != metadata.None { metadata.AddMeta(name, nil, "unit", unit, false) } if desc != "" { metadata.AddMeta(name, tags, "desc", desc, false) } tags = AddTags.Copy().Merge(tags) if b, ok := value.(bool); ok { if b { value = 1 } else { value = 0 } } d := opentsdb.DataPoint{ Metric: name, Timestamp: ts, Value: value, Tags: tags, } *md = append(*md, &d) }
func isPseudoFS(name string) (res bool) { err := readLine("/proc/filesystems", func(s string) error { ss := strings.Split(s, "\t") if len(ss) == 2 && ss[1] == name && ss[0] == "nodev" { res = true } return nil }) if err != nil { slog.Errorf("can not read '/proc/filesystems': %v", err) } return }
func isPseudoFS(name string) (res bool) { err := readLine("/proc/filesystems", func(s string) error { if strings.Contains(s, name) && strings.Contains(s, "nodev") { res = true return nil } return nil }) if err != nil { slog.Errorf("can not read '/proc/filesystems': %v", err) } return }
func c_redis_counters(server string, db int) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint conn, err := redis.Dial("tcp", server, redis.DialDatabase(db)) if err != nil { return md, slog.Wrap(err) } defer conn.Close() //do a dance to detect proper hscan command for ledis or redis hscanCmd := "XHSCAN" info, err := redis.String(conn.Do("info", "server")) if err != nil { return md, slog.Wrap(err) } if strings.Contains(info, "redis_version") { hscanCmd = "HSCAN" } cursor := "0" for { vals, err := redis.Values(conn.Do(hscanCmd, collect.RedisCountersKey, cursor)) if err != nil { return md, slog.Wrap(err) } if len(vals) != 2 { return md, fmt.Errorf("Unexpected number of values") } cursor, err = redis.String(vals[0], nil) if err != nil { return md, slog.Wrap(err) } pairs, err := redis.StringMap(vals[1], nil) if err != nil { return md, slog.Wrap(err) } for key, val := range pairs { ak := models.AlertKey(key) v, err := strconv.Atoi(val) if err != nil { slog.Errorf("Invalid counter value: %s", val) continue } Add(&md, ak.Name(), v, ak.Group(), metadata.Counter, metadata.Count, "") } if cursor == "" || cursor == "0" { break } } return md, nil }
func c_snmp_ips(community, host string) (opentsdb.MultiDataPoint, error) { ifIPAdEntAddrRaw, err := snmp_subtree(host, community, ifIPAdEntAddr) if err != nil { return nil, err } ipAdEnts := make(map[string]*ipAdEntAddr) for id, value := range ifIPAdEntAddrRaw { // Split entry type id from ip address sp := strings.SplitN(id, ".", 2) if len(sp) != 2 { slog.Errorln("unexpected length of snmp resonse") } typeId := sp[0] address := sp[1] if _, ok := ipAdEnts[address]; !ok { ipAdEnts[address] = &ipAdEntAddr{} } switch typeId { case "1": if v, ok := value.([]byte); ok { ipAdEnts[address].IP = v } case "2": if v, ok := value.(int64); ok { ipAdEnts[address].InterfaceId = v } case "3": if v, ok := value.([]byte); ok { ipAdEnts[address].Mask = v } } } ipsByInt := make(map[int64][]net.IPNet) for _, ipNet := range ipAdEnts { ipsByInt[ipNet.InterfaceId] = append(ipsByInt[ipNet.InterfaceId], ipNet.IPNet) } for intId, ipNets := range ipsByInt { var ips []string for _, ipNet := range ipNets { ips = append(ips, ipNet.String()) } sort.Strings(ips) j, err := json.Marshal(ips) if err != nil { slog.Errorf("error marshaling ips for host %v: %v", host, err) } metadata.AddMeta("", opentsdb.TagSet{"host": host, "iface": fmt.Sprintf("%v", intId)}, "addresses", string(j), false) } return nil, nil }
/* Listen on the specified udp port for events. This provides long term aggrigation for sparse events. wire format: opcode(1 byte) | data Opcodes: 1: increment - increments a redis counter for the specified metric/tag set data: count(4 bytes signed int) | metric:tag1=foo,tag2=bar */ func ListenUdp(port int, redisHost string, redisDb int) error { addr := net.UDPAddr{ Port: port, IP: net.ParseIP("127.0.0.1"), } conn, err := net.ListenUDP("udp", &addr) if err != nil { return err } pool := newRedisPool(redisHost, redisDb) for { buf := make([]byte, 1025) n, addr, err := conn.ReadFromUDP(buf) if err != nil { slog.Error(err) continue } if n == len(buf) { // if we get a full buffer, assume some was truncated. slog.Errorf("Too large a udp packet received from: %s. Skipping.", addr.String()) continue } Add("udp.packets", opentsdb.TagSet{}, 1) go func(data []byte, addr string) { c := pool.Get() defer c.Close() if len(data) == 0 { slog.Errorf("Empty packet received from %s.", addr) } switch data[0] { case 1: incrementRedisCounter(data[1:], addr, c) default: slog.Errorf("Unknown opcode %d from %s.", data[0], addr) } }(buf[:n], addr.String()) } }
func DatabaseCollect(c conf.Database, collectorStatsChan chan<- *ContinuousCollectorStats) { queries := DatabaseGetQueries(c) if len(queries) < 1 { slog.Errorf("%v: %v: %v", "Database", c.Type, "no queries") } db, err := DatabaseConnect(c) if err != nil { return } defer db.Close() var waitGroup sync.WaitGroup queryChan := make(chan *DatabaseQuery, 1) waitGroup.Add(c.MaxOpenConns) for i := 0; i < c.MaxOpenConns; i++ { go func() { DatabaseRunQueryWorker(c, db, queryChan, collectorStatsChan) waitGroup.Done() }() } Loop: for { nextTime := queries[0].next // maybe change to a priority queue instead of checking each slice? for i := range queries { if time.Now().After(queries[i].next) { queryChan <- &queries[i] queries[i].next = time.Now().Add(queries[i].interval) if queries[i].next.Before(nextTime) { nextTime = queries[i].next } } } next := time.After(nextTime.Sub(time.Now())) select { case <-next: case <-ContinuousCollectorVars.quit: break Loop } } close(queryChan) waitGroup.Wait() }
func c_cadvisor(c *client.Client, config *conf.Cadvisor) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint containers, err := c.AllDockerContainers(&v1.ContainerInfoRequest{NumStats: 1}) if err != nil { slog.Errorf("Error fetching containers from cadvisor: %v", err) return md, err } for _, container := range containers { statsForContainer(&md, &container, config) } return md, nil }
func (c *IntervalCollector) Run(dpchan chan<- *opentsdb.DataPoint, quit <-chan struct{}) { if c.Enable != nil { go func() { for { next := time.After(time.Minute * 5) c.Lock() c.enabled = c.Enable() c.Unlock() <-next } }() } for { interval := c.Interval if interval == 0 { interval = DefaultFreq } next := time.After(interval) if c.Enabled() { timeStart := time.Now() md, err := c.F() timeFinish := time.Since(timeStart) result := 0 if err != nil { slog.Errorf("%v: %v", c.Name(), err) result = 1 } if !collect.DisableDefaultCollectors { tags := opentsdb.TagSet{"collector": c.Name(), "os": runtime.GOOS} Add(&md, "scollector.collector.duration", timeFinish.Seconds(), tags, metadata.Gauge, metadata.Second, "Duration in seconds for each collector run.") Add(&md, "scollector.collector.error", result, tags, metadata.Gauge, metadata.Ok, "Status of collector run. 1=Error, 0=Success.") } for _, dp := range md { c.ApplyTagOverrides(dp.Tags) dpchan <- dp } } select { case <-next: case <-quit: return } } }
// RunHistory processes an event history and triggers notifications if needed. func (s *Schedule) RunHistory(r *RunHistory) { checkNotify := false silenced := s.Silenced() for ak, event := range r.Events { shouldNotify, err := s.runHistory(r, ak, event, silenced) checkNotify = checkNotify || shouldNotify if err != nil { slog.Errorf("Error in runHistory for %s. %s.", ak, err) } } if checkNotify && s.nc != nil { select { case s.nc <- true: default: } } }
func checkMdadmLinux() (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint volumes, err := filepath.Glob("/dev/md*") if err != nil { return md, err } for _, volume := range filterVolumes(volumes) { detail, err := examineMdadmVolume(volume) if err != nil { slog.Errorf("mdadm: can't parse %s data, %s", volume, err) continue } addMdadmMetric(&md, volume, detail) } return md, nil }
func (m *s) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown changes <- svc.Status{State: svc.StartPending} changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted} loop: for c := range r { switch c.Cmd { case svc.Interrogate: changes <- c.CurrentStatus case svc.Stop, svc.Shutdown: break loop default: slog.Errorf("unexpected control request #%d", c) } } changes <- svc.Status{State: svc.StopPending} return }