// processStatResults processes the given results, creating and setting DSStats, LastStats, and other stats. Note this is NOT threadsafe, and MUST NOT be called from multiple threads. func processStatResults( results []cache.Result, statHistoryThreadsafe StatHistoryThreadsafe, combinedStates peer.Crstates, lastStats LastStatsThreadsafe, toData todata.TOData, errorCount UintThreadsafe, dsStats DSStatsThreadsafe, lastStatEndTimes map[enum.CacheName]time.Time, lastStatDurationsThreadsafe DurationMapThreadsafe, unpolledCaches UnpolledCachesThreadsafe, ) { statHistory := statHistoryThreadsafe.Get().Copy() maxStats := statHistoryThreadsafe.Max() for _, result := range results { // TODO determine if we want to add results with errors, or just print the errors now and don't add them. statHistory[enum.CacheName(result.Id)] = pruneHistory(append(statHistory[enum.CacheName(result.Id)], result), maxStats) } statHistoryThreadsafe.Set(statHistory) for _, result := range results { log.Debugf("poll %v %v CreateStats start\n", result.PollID, time.Now()) } newDsStats, newLastStats, err := ds.CreateStats(statHistory, toData, combinedStates, lastStats.Get().Copy(), time.Now()) for _, result := range results { log.Debugf("poll %v %v CreateStats end\n", result.PollID, time.Now()) } if err != nil { errorCount.Inc() log.Errorf("getting deliveryservice: %v\n", err) } else { dsStats.Set(newDsStats) lastStats.Set(newLastStats) } endTime := time.Now() lastStatDurations := lastStatDurationsThreadsafe.Get().Copy() for _, result := range results { if lastStatStart, ok := lastStatEndTimes[enum.CacheName(result.Id)]; ok { d := time.Since(lastStatStart) lastStatDurations[enum.CacheName(result.Id)] = d } lastStatEndTimes[enum.CacheName(result.Id)] = endTime // log.Debugf("poll %v %v statfinish\n", result.PollID, endTime) result.PollFinished <- result.PollID } lastStatDurationsThreadsafe.Set(lastStatDurations) unpolledCaches.SetPolled(results, lastStats) }
// processHealthResult processes the given health results, adding their stats to the CacheAvailableStatus. Note this is NOT threadsafe, because it non-atomically gets CacheAvailableStatuses, Events, LastHealthDurations and later updates them. This MUST NOT be called from multiple threads. func processHealthResult(cacheHealthChan <-chan cache.Result, toData todata.TODataThreadsafe, localStates peer.CRStatesThreadsafe, lastHealthDurationsThreadsafe DurationMapThreadsafe, statHistory StatHistoryThreadsafe, monitorConfig TrafficMonitorConfigMapThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, fetchCount UintThreadsafe, errorCount UintThreadsafe, events EventsThreadsafe, localCacheStatusThreadsafe CacheAvailableStatusThreadsafe, lastHealthEndTimes map[enum.CacheName]time.Time, healthHistory map[enum.CacheName][]cache.Result, results []cache.Result, cfg config.Config) { if len(results) == 0 { return } toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result localCacheStatus := localCacheStatusThreadsafe.Get().Copy() monitorConfigCopy := monitorConfig.Get() for _, healthResult := range results { log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now()) fetchCount.Inc() var prevResult cache.Result healthResultHistory := healthHistory[enum.CacheName(healthResult.Id)] // healthResultHistory := healthHistory.Get(enum.CacheName(healthResult.Id)) if len(healthResultHistory) != 0 { prevResult = healthResultHistory[len(healthResultHistory)-1] } health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy) // healthHistory.Set(enum.CacheName(healthResult.Id), pruneHistory(append(healthHistory.Get(enum.CacheName(healthResult.Id)), healthResult), defaultMaxHistory)) healthHistory[enum.CacheName(healthResult.Id)] = pruneHistory(append(healthHistory[enum.CacheName(healthResult.Id)], healthResult), cfg.MaxHealthHistory) isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfigCopy) if localStates.Get().Caches[healthResult.Id].IsAvailable != isAvailable { log.Infof("Changing state for %s was: %t now: %t because %s errors: %v", healthResult.Id, prevResult.Available, isAvailable, whyAvailable, healthResult.Errors) events.Add(Event{Time: time.Now().Unix(), Description: whyAvailable, Name: healthResult.Id, Hostname: healthResult.Id, Type: toDataCopy.ServerTypes[healthResult.Id].String(), Available: isAvailable}) } localCacheStatus[healthResult.Id] = CacheAvailableStatus{Available: isAvailable, Status: monitorConfigCopy.TrafficServer[string(healthResult.Id)].Status} // TODO move within localStates? localStates.SetCache(healthResult.Id, peer.IsAvailable{IsAvailable: isAvailable}) log.Debugf("poll %v %v calculateDeliveryServiceState start\n", healthResult.PollID, time.Now()) calculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates) log.Debugf("poll %v %v calculateDeliveryServiceState end\n", healthResult.PollID, time.Now()) } localCacheStatusThreadsafe.Set(localCacheStatus) // TODO determine if we should combineCrStates() here lastHealthDurations := lastHealthDurationsThreadsafe.Get().Copy() for _, healthResult := range results { if lastHealthStart, ok := lastHealthEndTimes[enum.CacheName(healthResult.Id)]; ok { d := time.Since(lastHealthStart) lastHealthDurations[enum.CacheName(healthResult.Id)] = d } lastHealthEndTimes[enum.CacheName(healthResult.Id)] = time.Now() log.Debugf("poll %v %v finish\n", healthResult.PollID, time.Now()) healthResult.PollFinished <- healthResult.PollID } lastHealthDurationsThreadsafe.Set(lastHealthDurations) }
func (f HttpFetcher) Fetch(id string, url string, pollId uint64, pollFinishedChan chan<- uint64) { log.Debugf("poll %v %v fetch start\n", pollId, time.Now()) req, err := http.NewRequest("GET", url, nil) // TODO: change this to use f.Headers. -jse req.Header.Set("User-Agent", "traffic_monitor/1.0") // TODO change to 2.0? req.Header.Set("Connection", "keep-alive") if f.Pending != nil { f.Pending.Inc() } response, err := f.Client.Do(req) if f.Pending != nil { f.Pending.Dec() } defer func() { if response != nil && response.Body != nil { ioutil.ReadAll(response.Body) // TODO determine if necessary response.Body.Close() } }() if err == nil && response == nil { err = fmt.Errorf("err nil and response nil") } if err == nil && response != nil && (response.StatusCode < 200 || response.StatusCode > 299) { err = fmt.Errorf("bad status: %v", response.StatusCode) } if err != nil { err = fmt.Errorf("fetch error: %v", err) } if err == nil && response != nil { if f.Success != nil { f.Success.Inc() } log.Debugf("poll %v %v fetch end\n", pollId, time.Now()) f.Handler.Handle(id, response.Body, err, pollId, pollFinishedChan) } else { if f.Fail != nil { f.Fail.Inc() } f.Handler.Handle(id, nil, err, pollId, pollFinishedChan) } }
func (handler Handler) Handle(id string, r io.Reader, err error, pollId uint64, pollFinished chan<- uint64) { log.Debugf("poll %v %v handle start\n", pollId, time.Now()) result := Result{ Id: enum.CacheName(id), Available: false, Errors: []error{}, Time: time.Now(), // TODO change this to be computed the instant we get the result back, to minimise inaccuracy PollID: pollId, PollFinished: pollFinished, } if err != nil { log.Errorf("%v handler given error '%v'\n", id, err) // error here, in case the thing that called Handle didn't error result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if r == nil { log.Errorf("%v handle reader nil\n", id) result.Errors = append(result.Errors, fmt.Errorf("handler got nil reader")) handler.ResultChannel <- result return } result.PrecomputedData.Reporting = true if err := json.NewDecoder(r).Decode(&result.Astats); err != nil { log.Errorf("%s procnetdev decode error '%v'\n", id, err) result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if result.Astats.System.ProcNetDev == "" { log.Warnf("addkbps %s procnetdev empty\n", id) } if result.Astats.System.InfSpeed == 0 { log.Warnf("addkbps %s inf.speed empty\n", id) } log.Debugf("poll %v %v handle decode end\n", pollId, time.Now()) if err != nil { result.Errors = append(result.Errors, err) log.Errorf("addkbps handle %s error '%v'\n", id, err) } else { result.Available = true } if handler.Precompute() { log.Debugf("poll %v %v handle precompute start\n", pollId, time.Now()) result = handler.precompute(result) log.Debugf("poll %v %v handle precompute end\n", pollId, time.Now()) } log.Debugf("poll %v %v handle write start\n", pollId, time.Now()) handler.ResultChannel <- result log.Debugf("poll %v %v handle end\n", pollId, time.Now()) }
// TODO iterationCount and/or p.TickChan? func pollHttp(interval time.Duration, id string, url string, fetcher fetcher.Fetcher, die <-chan struct{}) { tick := time.NewTicker(interval) lastTime := time.Now() for { select { case now := <-tick.C: realInterval := now.Sub(lastTime) if realInterval > interval+(time.Millisecond*100) { instr.TimerFail.Inc() log.Infof("Intended Duration: %v Actual Duration: %v\n", interval, realInterval) } lastTime = time.Now() pollId := atomic.AddUint64(&debugPollNum, 1) pollFinishedChan := make(chan uint64) log.Debugf("poll %v %v start\n", pollId, time.Now()) go fetcher.Fetch(id, url, pollId, pollFinishedChan) // TODO persist fetcher, with its own die chan? <-pollFinishedChan case <-die: return } } }
func addAvailableData(dsStats Stats, crStates peer.Crstates, serverCachegroups map[enum.CacheName]enum.CacheGroupName, serverDs map[enum.CacheName][]enum.DeliveryServiceName, serverTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result) (Stats, error) { for cache, available := range crStates.Caches { cacheGroup, ok := serverCachegroups[cache] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Cachegroups\n", cache) continue } deliveryServices, ok := serverDs[cache] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in DeliveryServices\n", cache) continue } cacheType, ok := serverTypes[enum.CacheName(cache)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Server Types\n", cache) continue } for _, deliveryService := range deliveryServices { if deliveryService == "" { log.Errorf("EMPTY addAvailableData DS") // various bugs in other functions can cause this - this will help identify and debug them. continue } stat, ok := dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Stats\n", cache) continue // TODO log warning? Error? } if available.IsAvailable { // c.IsAvailable.Value stat.CommonStats.IsAvailable.Value = true stat.CommonStats.CachesAvailableNum.Value++ cacheGroupStats := stat.CacheGroups[enum.CacheGroupName(cacheGroup)] cacheGroupStats.IsAvailable.Value = true stat.CacheGroups[enum.CacheGroupName(cacheGroup)] = cacheGroupStats stat.TotalStats.IsAvailable.Value = true typeStats := stat.Types[cacheType] typeStats.IsAvailable.Value = true stat.Types[cacheType] = typeStats } // TODO fix nested ifs if results, ok := statHistory[enum.CacheName(cache)]; ok { if len(results) < 1 { log.Warnf("no results %v %v\n", cache, deliveryService) } else { result := results[0] if result.PrecomputedData.Reporting { stat.CommonStats.CachesReporting[enum.CacheName(cache)] = true } else { log.Debugf("no reporting %v %v\n", cache, deliveryService) } } } else { log.Debugf("no result for %v %v\n", cache, deliveryService) } dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = stat // TODO Necessary? Remove? } } return dsStats, nil }
func createCacheStatuses( cacheTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result, lastHealthDurations map[enum.CacheName]time.Duration, cacheStates map[enum.CacheName]peer.IsAvailable, lastStats ds.LastStats, localCacheStatusThreadsafe CacheAvailableStatusThreadsafe, ) map[enum.CacheName]CacheStatus { conns := createCacheConnections(statHistory) statii := map[enum.CacheName]CacheStatus{} localCacheStatus := localCacheStatusThreadsafe.Get() for cacheName, cacheType := range cacheTypes { cacheStatHistory, ok := statHistory[cacheName] if !ok { log.Warnf("createCacheStatuses stat history missing cache %s\n", cacheName) continue } if len(cacheStatHistory) < 1 { log.Warnf("createCacheStatuses stat history empty for cache %s\n", cacheName) continue } log.Debugf("createCacheStatuses NOT empty for cache %s\n", cacheName) var loadAverage *float64 procLoadAvg := cacheStatHistory[0].Astats.System.ProcLoadavg if procLoadAvg != "" { firstSpace := strings.IndexRune(procLoadAvg, ' ') if firstSpace == -1 { log.Warnf("WARNING unexpected proc.loadavg '%s' for cache %s\n", procLoadAvg, cacheName) } else { loadAverageVal, err := strconv.ParseFloat(procLoadAvg[:firstSpace], 64) if err != nil { log.Warnf("proc.loadavg doesn't contain a float prefix '%s' for cache %s\n", procLoadAvg, cacheName) } else { loadAverage = &loadAverageVal } } } var queryTime *int64 queryTimeVal, ok := lastHealthDurations[cacheName] if !ok { log.Warnf("cache not in last health durations cache %s\n", cacheName) } else { queryTimeInt := int64(queryTimeVal / time.Millisecond) queryTime = &queryTimeInt } var kbps *float64 lastStat, ok := lastStats.Caches[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in last kbps cache %s\n", cacheName) } else { kbpsVal := lastStat.Bytes.PerSec / float64(ds.BytesPerKilobit) kbps = &kbpsVal } var connections *int64 connectionsVal, ok := conns[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in connections %s\n", cacheName) } else { connections = &connectionsVal } var status *string statusVal, ok := localCacheStatus[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in statuses %s\n", cacheName) } else { statusString := statusVal.Status + " - " if localCacheStatus[enum.CacheName(cacheName)].Available { statusString += "available" } else { statusString += "unavailable" } status = &statusString } cacheTypeStr := string(cacheType) statii[enum.CacheName(cacheName)] = CacheStatus{Type: &cacheTypeStr, LoadAverage: loadAverage, QueryTimeMilliseconds: queryTime, BandwidthKbps: kbps, ConnectionCount: connections, Status: status} } return statii }