// instrument wraps around a processing function, and makes sure we track the number of metrics and duration of the call, // which it flushes as metrics2.0 metrics to the outgoing buffer. func (s *StatsDaemon) instrument(st statsdType, buf []byte, now int64, name string) ([]byte, int64) { time_start := s.Clock.Now() buf, num := st.Process(buf, now, s.flushInterval) time_end := s.Clock.Now() duration_ms := float64(time_end.Sub(time_start).Nanoseconds()) / float64(1000000) buf = common.WriteFloat64(buf, []byte(fmt.Sprintf("%sstatsd_type_is_%s.target_type_is_gauge.type_is_calculation.unit_is_ms", s.prefix, name)), duration_ms, now) buf = common.WriteFloat64(buf, []byte(fmt.Sprintf("%sdirection_is_out.statsd_type_is_%s.target_type_is_rate.unit_is_Metricps", s.prefix, name)), float64(num)/float64(s.flushInterval), now) return buf, num }
// Process puts gauges in the outbound buffer func (g *Gauges) Process(buf []byte, now int64, interval int) ([]byte, int64) { var num int64 for key, val := range g.Values { buf = common.WriteFloat64(buf, []byte(m20.Gauge(key, g.prefix)), val, now) num++ } return buf, num }
// graphiteWriter is the background workers that connects to graphite and submits all pending data to it // TODO: conn.Write() returns no error for a while when the remote endpoint is down, the reconnect happens with a delay func (s *StatsDaemon) graphiteWriter() { lock := &sync.Mutex{} connectTicker := s.Clock.Tick(2 * time.Second) var conn net.Conn var err error go func() { for range connectTicker { lock.Lock() if conn == nil { conn, err = net.Dial("tcp", s.graphite_addr) if err == nil { log.Printf("now connected to %s", s.graphite_addr) } else { log.Printf("WARN: dialing %s failed: %s. will retry", s.graphite_addr, err.Error()) } } lock.Unlock() } }() for buf := range s.graphiteQueue { lock.Lock() haveConn := (conn != nil) lock.Unlock() for !haveConn { s.Clock.Sleep(time.Second) lock.Lock() haveConn = (conn != nil) lock.Unlock() } if s.debug { for _, line := range bytes.Split(buf, []byte("\n")) { if len(line) == 0 { continue } log.Printf("DEBUG: WRITING %s", line) } } ok := false var duration float64 var pre time.Time for !ok { pre = s.Clock.Now() lock.Lock() _, err = conn.Write(buf) if err == nil { ok = true duration = float64(s.Clock.Now().Sub(pre).Nanoseconds()) / float64(1000000) if s.debug { log.Println("DEBUG: wrote metrics payload to graphite!") } } else { log.Printf("failed to write to graphite: %s (took %s). will retry...", err, s.Clock.Now().Sub(pre)) conn.Close() conn = nil haveConn = false } lock.Unlock() for !ok && !haveConn { s.Clock.Sleep(2 * time.Second) lock.Lock() haveConn = (conn != nil) lock.Unlock() } } buf = buf[:0] buf = common.WriteFloat64(buf, []byte(fmt.Sprintf("%starget_type_is_gauge.type_is_send.unit_is_ms", s.prefix)), duration, pre.Unix()) ok = false for !ok { lock.Lock() _, err = conn.Write(buf) if err == nil { ok = true if s.debug { log.Println("DEBUG: wrote sendtime to graphite!") } } else { log.Printf("failed to write target_type_is_gauge.type_is_send.unit_is_ms: %s. will retry...", err) conn.Close() conn = nil haveConn = false } lock.Unlock() for !ok && !haveConn { s.Clock.Sleep(2 * time.Second) lock.Lock() haveConn = (conn != nil) lock.Unlock() } } } lock.Lock() if conn != nil { conn.Close() } lock.Unlock() }
// Process computes the outbound metrics for timers and puts them in the buffer func (timers *Timers) Process(buf []byte, now int64, interval int) ([]byte, int64) { // these are the metrics that get exposed: // count estimate of original amount of metrics sent, by dividing received by samplerate // count_ps same but per second // lower // mean // arithmetic mean // mean_<pct> // arithmetic mean of values below <pct> percentile // median // std standard deviation // sum // sum_90 // upper // upper_90 / lower_90 var num int64 for u, t := range timers.Values { if len(t.Points) > 0 { seen := len(t.Points) count := t.Amount_submitted count_ps := float64(count) / float64(interval) num++ sort.Sort(t.Points) min := t.Points[0] max := t.Points[seen-1] sum := float64(0) for _, value := range t.Points { sum += value } mean := float64(sum) / float64(seen) sumOfDiffs := float64(0) for _, value := range t.Points { sumOfDiffs += math.Pow((float64(value) - mean), 2) } stddev := math.Sqrt(sumOfDiffs / float64(seen)) mid := seen / 2 var median float64 if seen%2 == 1 { median = t.Points[mid] } else { median = (t.Points[mid-1] + t.Points[mid]) / 2 } var cumulativeValues Float64Slice cumulativeValues = make(Float64Slice, seen, seen) cumulativeValues[0] = t.Points[0] for i := 1; i < seen; i++ { cumulativeValues[i] = t.Points[i] + cumulativeValues[i-1] } maxAtThreshold := max sum_pct := sum mean_pct := mean for _, pct := range timers.pctls { if seen > 1 { var abs float64 if pct.float >= 0 { abs = pct.float } else { abs = 100 + pct.float } // poor man's math.Round(x): // math.Floor(x + 0.5) indexOfPerc := int(math.Floor(((abs / 100.0) * float64(seen)) + 0.5)) if pct.float >= 0 { sum_pct = cumulativeValues[indexOfPerc-1] maxAtThreshold = t.Points[indexOfPerc-1] } else { maxAtThreshold = t.Points[indexOfPerc] sum_pct = cumulativeValues[seen-1] - cumulativeValues[seen-indexOfPerc-1] } mean_pct = float64(sum_pct) / float64(indexOfPerc) } var pctstr string var fn func(metric_in, prefix, percentile, timespec string) string if pct.float >= 0 { pctstr = pct.str fn = m20.Max } else { pctstr = pct.str[1:] fn = m20.Min } buf = common.WriteFloat64(buf, []byte(fn(u, timers.prefix, pctstr, "")), maxAtThreshold, now) buf = common.WriteFloat64(buf, []byte(m20.Mean(u, timers.prefix, pctstr, "")), mean_pct, now) buf = common.WriteFloat64(buf, []byte(m20.Sum(u, timers.prefix, pctstr, "")), sum_pct, now) } buf = common.WriteFloat64(buf, []byte(m20.Mean(u, timers.prefix, "", "")), mean, now) buf = common.WriteFloat64(buf, []byte(m20.Median(u, timers.prefix, "", "")), median, now) buf = common.WriteFloat64(buf, []byte(m20.Std(u, timers.prefix, "", "")), stddev, now) buf = common.WriteFloat64(buf, []byte(m20.Sum(u, timers.prefix, "", "")), sum, now) buf = common.WriteFloat64(buf, []byte(m20.Max(u, timers.prefix, "", "")), max, now) buf = common.WriteFloat64(buf, []byte(m20.Min(u, timers.prefix, "", "")), min, now) buf = common.WriteInt64(buf, []byte(m20.CountPckt(u, timers.prefix)), count, now) buf = common.WriteFloat64(buf, []byte(m20.RatePckt(u, timers.prefix)), count_ps, now) } } return buf, num }