func (s *SenderStats) Sent(info SentInfo) { if DebugStats { fmt.Printf("\n%+v\n", info) fmt.Printf("range: %s to %s (%s)\n", pct.TimeString(s.begin), pct.TimeString(s.end), s.end.Sub(s.begin)) defer func() { fmt.Printf("range: %s to %s (%s)\n", pct.TimeString(s.begin), pct.TimeString(s.end), s.end.Sub(s.begin)) }() } // Save this info and make it the latest. s.sent.PushFront(info) s.end = info.End.UTC() if s.full { old := []*list.Element{} for e := s.sent.Back(); e != nil && e.Prev() != nil; e = e.Prev() { // We can remove this info (e) if the next info (e.Prev) to s.end // maintains the full duration. info := e.Prev().Value.(SentInfo) d := s.end.Sub(info.Begin.UTC()) if DebugStats { fmt.Printf("have %s at %s\n", d, info.Begin.UTC()) } if d < s.d { // Can't remove this info because next info to s.end makes // duration too short. break } // Remove this info because next info to s.end is sufficiently // long duration. old = append(old, e) } for _, e := range old { if DebugStats { fmt.Printf("pop %+v\n", e.Value.(SentInfo)) } s.sent.Remove(e) } } else if info.End.UTC().Sub(s.begin) >= s.d { if DebugStats { fmt.Println("full") } s.full = true } // Keep oldest up to date so we can determine when duration is full. s.begin = s.sent.Back().Value.(SentInfo).Begin.UTC() }
func (m *Monitor) run() { m.logger.Debug("run:call") defer func() { if err := recover(); err != nil { m.logger.Error("MySQL monitor crashed: ", err) } m.conn.Close() m.status.Update(m.name, "Stopped") m.sync.Done() m.logger.Debug("run:return") }() connected := false go m.connect(nil) m.status.Update(m.name, "Ready") var lastTs int64 var lastError string for { t := time.Unix(lastTs, 0) if lastError == "" { m.status.Update(m.name, fmt.Sprintf("Idle (last collected at %s)", pct.TimeString(t))) } else { m.status.Update(m.name, fmt.Sprintf("Idle (last collected at %s, error: %s)", pct.TimeString(t), lastError)) } select { case now := <-m.tickChan: m.logger.Debug("run:collect:start") if !connected { m.logger.Debug("run:collect:disconnected") lastError = "Not connected to MySQL" continue } m.status.Update(m.name, "Running") c := &mm.Collection{ ServiceInstance: proto.ServiceInstance{ Service: m.config.Service, InstanceId: m.config.InstanceId, }, Ts: now.UTC().Unix(), Metrics: []mm.Metric{}, } // Start timing the collection. If must take < collectLimit else // it's discarded. start := time.Now() conn := m.conn.DB() // SHOW GLOBAL STATUS if err := m.GetShowStatusMetrics(conn, c); err != nil { if m.collectError(err) == networkError { connected = false continue } } // SELECT NAME, ... FROM INFORMATION_SCHEMA.INNODB_METRICS if len(m.config.InnoDB) > 0 { if err := m.GetInnoDBMetrics(conn, c); err != nil { switch m.collectError(err) { case accessDenied: m.config.InnoDB = []string{} case networkError: connected = false continue } } } if m.config.UserStats { // SELECT ... FROM INFORMATION_SCHEMA.TABLE_STATISTICS if err := m.getTableUserStats(conn, c, m.config.UserStatsIgnoreDb); err != nil { switch m.collectError(err) { case accessDenied: m.config.UserStats = false case networkError: connected = false continue } } // SELECT ... FROM INFORMATION_SCHEMA.INDEX_STATISTICS if err := m.getIndexUserStats(conn, c, m.config.UserStatsIgnoreDb); err != nil { switch m.collectError(err) { case accessDenied: m.config.UserStats = false case networkError: connected = false continue } } } // It is possible that collecting metrics will stall for many // seconds for some reason so even though we issued captures 1 sec in // between, we actually got 5 seconds between results and as such we // might be showing huge spike. // To avoid that, if the time to collect metrics is >= collectLimit // then warn and discard the metrics. diff := time.Now().Sub(start).Seconds() if diff >= m.collectLimit { lastError = fmt.Sprintf("Skipping interval because it took too long to collect: %.2fs >= %.2fs", diff, m.collectLimit) m.logger.Warn(lastError) continue } // Send the metrics to an mm.Aggregator. m.status.Update(m.name, "Sending metrics") if len(c.Metrics) > 0 { select { case m.collectionChan <- c: lastTs = c.Ts lastError = "" case <-time.After(500 * time.Millisecond): // lost collection m.logger.Debug("Lost MySQL metrics; timeout spooling after 500ms") lastError = "Spool timeout" } } else { m.logger.Debug("run:no metrics") // shouldn't happen lastError = "No metrics" } m.logger.Debug("run:collect:stop") case connected = <-m.connectedChan: m.logger.Debug("run:connected:true") m.status.Update(m.name, "Ready") case <-m.restartChan: m.logger.Debug("run:mysql:restart") connected = false go m.connect(fmt.Errorf("Lost connection to MySQL, restarting")) case <-m.sync.StopChan: m.logger.Debug("run:stop") return } } }
func (s *Sender) send() { s.logger.Debug("send:call") defer s.logger.Debug("send:return") sent := SentInfo{} defer func() { sent.End = time.Now() s.status.Update("data-sender", "Disconnecting") s.client.DisconnectOnce() // Stats for this run. s.lastStats.Sent(sent) r := s.lastStats.Report() report := fmt.Sprintf("at %s: %s", pct.TimeString(r.Begin), FormatSentReport(r)) s.status.Update("data-sender-last", report) s.logger.Info(report) // Stats for the last day. s.dailyStats.Sent(sent) r = s.dailyStats.Report() report = fmt.Sprintf("since %s: %s", pct.TimeString(r.Begin), FormatSentReport(r)) s.status.Update("data-sender-1d", report) s.status.Update("data-sender", "Idle") }() // Connect and send files until too many errors occur. startTime := time.Now() sent.Begin = startTime for sent.ApiErrs == 0 && sent.Errs < MAX_SEND_ERRORS && sent.Timeouts == 0 { // Check runtime, don't send forever. runTime := time.Now().Sub(startTime).Seconds() if uint(runTime) > s.timeout { sent.Timeouts++ s.logger.Warn(fmt.Sprintf("Timeout sending data: %.2fs > %ds", runTime, s.timeout)) return } // Connect to API, or retry. s.status.Update("data-sender", "Connecting") s.logger.Debug("send:connecting") if sent.Errs > 0 { time.Sleep(CONNECT_ERROR_WAIT * time.Second) } if err := s.client.ConnectOnce(10); err != nil { sent.Errs++ s.logger.Warn("Cannot connect to API: ", err) continue // retry } s.logger.Debug("send:connected") // Send all files, or stop on error or timeout. if err := s.sendAllFiles(startTime, &sent); err != nil { sent.Errs++ s.logger.Warn(err) s.client.DisconnectOnce() continue // error sending files, re-connect and try again } return // success or API error, either way, stop sending } }