func (handler Handler) Handle(id string, r io.Reader, err error, pollId uint64, pollFinished chan<- uint64) { log.Debugf("poll %v %v handle start\n", pollId, time.Now()) result := Result{ Id: id, Available: false, Errors: []error{}, Time: time.Now(), // TODO change this to be computed the instant we get the result back, to minimise inaccuracy PollID: pollId, PollFinished: pollFinished, } if err != nil { log.Errorf("%v handler given error '%v'\n", id, err) // error here, in case the thing that called Handle didn't error result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if r == nil { log.Errorf("%v handle reader nil\n", id) result.Errors = append(result.Errors, fmt.Errorf("handler got nil reader")) handler.ResultChannel <- result return } result.PrecomputedData.Reporting = true if err := json.NewDecoder(r).Decode(&result.Astats); err != nil { log.Errorf("%s procnetdev decode error '%v'\n", id, err) result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if result.Astats.System.ProcNetDev == "" { log.Warnf("addkbps %s procnetdev empty\n", id) } log.Debugf("poll %v %v handle decode end\n", pollId, time.Now()) if err != nil { result.Errors = append(result.Errors, err) log.Errorf("addkbps handle %s error '%v'\n", id, err) } else { result.Available = true } if handler.Precompute() { log.Debugf("poll %v %v handle precompute start\n", pollId, time.Now()) result = handler.precompute(result) log.Debugf("poll %v %v handle precompute end\n", pollId, time.Now()) } log.Debugf("poll %v %v handle write start\n", pollId, time.Now()) handler.ResultChannel <- result log.Debugf("poll %v %v handle end\n", pollId, time.Now()) }
func CreateStats(statHistory map[enum.CacheName][]cache.Result, toData todata.TOData, crStates peer.Crstates, lastKbpsStats StatsLastKbps, now time.Time) (Stats, StatsLastKbps, error) { start := time.Now() dsStats := NewStats() for deliveryService, _ := range toData.DeliveryServiceServers { if deliveryService == "" { log.Errorf("EMPTY CreateStats deliveryService") continue } dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = *dsdata.NewStat() } dsStats = setStaticData(dsStats, toData.DeliveryServiceServers) var err error dsStats, err = addAvailableData(dsStats, crStates, toData.ServerCachegroups, toData.ServerDeliveryServices, toData.ServerTypes, statHistory) // TODO move after stat summarisation if err != nil { return dsStats, lastKbpsStats, fmt.Errorf("Error getting Cache availability data: %v", err) } for server, history := range statHistory { if len(history) < 1 { continue // TODO warn? } cachegroup, ok := toData.ServerCachegroups[server] if !ok { log.Warnf("server %s has no cachegroup, skipping\n", server) continue } serverType, ok := toData.ServerTypes[enum.CacheName(server)] if !ok { log.Warnf("server %s not in CRConfig, skipping\n", server) continue } result := history[len(history)-1] // TODO check result.PrecomputedData.Errors for ds, resultStat := range result.PrecomputedData.DeliveryServiceStats { if ds == "" { log.Errorf("EMPTY precomputed delivery service") continue } if _, ok := dsStats.DeliveryService[ds]; !ok { dsStats.DeliveryService[ds] = resultStat continue } httpDsStat := dsStats.DeliveryService[ds] httpDsStat.Total = httpDsStat.Total.Sum(resultStat.Total) httpDsStat.CacheGroups[cachegroup] = httpDsStat.CacheGroups[cachegroup].Sum(resultStat.CacheGroups[cachegroup]) httpDsStat.Type[serverType] = httpDsStat.Type[serverType].Sum(resultStat.Type[serverType]) dsStats.DeliveryService[ds] = httpDsStat // TODO determine if necessary } } kbpsStats, kbpsStatsLastKbps, kbpsErr := addKbps(statHistory, dsStats, lastKbpsStats, now) log.Infof("CreateStats took %v\n", time.Since(start)) return kbpsStats, kbpsStatsLastKbps, kbpsErr }
// TODO remove change subscribers, give Threadsafes directly to the things that need them. If they only set vars, and don't actually do work on change. func opsConfigManagerListen(opsConfig OpsConfigThreadsafe, opsConfigChannel <-chan handler.OpsConfig, dr chan<- http_server.DataRequest, toSession towrap.ITrafficOpsSession, toData todata.TODataThreadsafe, opsConfigChangeSubscribers []chan<- handler.OpsConfig, toChangeSubscribers []chan<- towrap.ITrafficOpsSession) { httpServer := http_server.Server{} errorCount := 0 // TODO make threadsafe and a pointer to errorcount in the main manager? for { select { case newOpsConfig := <-opsConfigChannel: var err error opsConfig.Set(newOpsConfig) listenAddress := ":80" // default if newOpsConfig.HttpListener != "" { listenAddress = newOpsConfig.HttpListener } handleErr := func(err error) { errorCount++ log.Errorf("OpsConfigManager: %v\n", err) } err = httpServer.Run(dr, listenAddress) if err != nil { handleErr(fmt.Errorf("MonitorConfigPoller: error creating HTTP server: %s\n", err)) continue } realToSession, err := to.Login(newOpsConfig.Url, newOpsConfig.Username, newOpsConfig.Password, newOpsConfig.Insecure) if err != nil { handleErr(fmt.Errorf("MonitorConfigPoller: error instantiating Session with traffic_ops: %s\n", err)) continue } toSession = towrap.NewTrafficOpsSessionThreadsafe(realToSession) if err := toData.Fetch(toSession, newOpsConfig.CdnName); err != nil { handleErr(fmt.Errorf("Error getting Traffic Ops data: %v\n", err)) continue } // These must be in a goroutine, because the monitorConfigPoller tick sends to a channel this select listens for. Thus, if we block on sends to the monitorConfigPoller, we have a livelock race condition. // More generically, we're using goroutines as an infinite chan buffer, to avoid potential livelocks for _, subscriber := range opsConfigChangeSubscribers { go func() { subscriber <- newOpsConfig // this is needed for cdnName }() } for _, subscriber := range toChangeSubscribers { go func() { subscriber <- toSession }() } } } }
// precompute does the calculations which are possible with only this one cache result. func (handler Handler) precompute(result Result) Result { todata := handler.ToData.Get() stats := map[enum.DeliveryServiceName]dsdata.Stat{} var err error if result.PrecomputedData.OutBytes, err = outBytes(result.Astats.System.ProcNetDev, result.Astats.System.InfName); err != nil { result.PrecomputedData.OutBytes = 0 log.Errorf("addkbps %s handle precomputing outbytes '%v'\n", result.Id, err) } for stat, value := range result.Astats.Ats { var err error stats, err = processStat(result.Id, stats, todata, stat, value) if err != nil && err != dsdata.ErrNotProcessedStat { log.Errorf("precomputing cache %v stat %v value %v error %v", result.Id, stat, value, err) result.PrecomputedData.Errors = append(result.PrecomputedData.Errors, err) } } result.PrecomputedData.DeliveryServiceStats = stats return result }
func dataRequestManagerListen(dr <-chan http_server.DataRequest, opsConfig OpsConfigThreadsafe, toSession towrap.ITrafficOpsSession, localStates peer.CRStatesThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, statHistory StatHistoryThreadsafe, dsStats DSStatsThreadsafe, events EventsThreadsafe, staticAppData StaticAppData, healthPollInterval time.Duration, lastHealthDurations DurationMapThreadsafe, fetchCount UintThreadsafe, healthIteration UintThreadsafe, errorCount UintThreadsafe, toData todata.TODataThreadsafe, localCacheStatus CacheAvailableStatusThreadsafe, lastKbpsStats StatsLastKbpsThreadsafe) { for { select { case req := <-dr: defer close(req.Response) var body []byte var err error switch req.Type { case http_server.TRConfig: cdnName := opsConfig.Get().CdnName if toSession == nil { err = fmt.Errorf("Unable to connect to Traffic Ops") } else if cdnName == "" { err = fmt.Errorf("No CDN Configured") } else { body, err = toSession.CRConfigRaw(cdnName) } if err != nil { err = fmt.Errorf("TR Config: %v", err) } case http_server.TRStateDerived: body, err = peer.CrstatesMarshall(combinedStates.Get()) if err != nil { err = fmt.Errorf("TR State (derived): %v", err) } case http_server.TRStateSelf: body, err = peer.CrstatesMarshall(localStates.Get()) if err != nil { err = fmt.Errorf("TR State (self): %v", err) } case http_server.CacheStats: // TODO: add support for ?hc=N query param, stats=, wildcard, individual caches // add pp and date to the json: /* pp: "0=[my-ats-edge-cache-1], hc=[1]", date: "Thu Oct 09 20:28:36 UTC 2014" */ params := req.Parameters hc := 1 if _, exists := params["hc"]; exists { v, err := strconv.Atoi(params["hc"][0]) if err == nil { hc = v } } body, err = cache.StatsMarshall(statHistory.Get(), hc) if err != nil { err = fmt.Errorf("CacheStats: %v", err) } case http_server.DSStats: body, err = json.Marshal(ds.StatsJSON(dsStats.Get())) // TODO marshall beforehand, for performance? (test to see how often requests are made) if err != nil { err = fmt.Errorf("DsStats: %v", err) } case http_server.EventLog: body, err = json.Marshal(JSONEvents{Events: events.Get()}) if err != nil { err = fmt.Errorf("EventLog: %v", err) } case http_server.PeerStates: body, err = json.Marshal(createApiPeerStates(peerStates.Get())) case http_server.StatSummary: body = []byte("TODO implement") case http_server.Stats: body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get()) if err != nil { err = fmt.Errorf("Stats: %v", err) } case http_server.ConfigDoc: opsConfigCopy := opsConfig.Get() // if the password is blank, leave it blank, so callers can see it's missing. if opsConfigCopy.Password != "" { opsConfigCopy.Password = "******" } body, err = json.Marshal(opsConfigCopy) if err != nil { err = fmt.Errorf("Config Doc: %v", err) } case http_server.APICacheCount: // TODO determine if this should use peerStates body = []byte(strconv.Itoa(len(localStates.Get().Caches))) case http_server.APICacheAvailableCount: body = []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches))) case http_server.APICacheDownCount: body = []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches))) case http_server.APIVersion: s := "traffic_monitor-" + staticAppData.Version + "." if len(staticAppData.GitRevision) > 6 { s += staticAppData.GitRevision[:6] } else { s += staticAppData.GitRevision } body = []byte(s) case http_server.APITrafficOpsURI: body = []byte(opsConfig.Get().Url) case http_server.APICacheStates: body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(), lastHealthDurations.Get(), localStates.Get().Caches, lastKbpsStats.Get(), localCacheStatus)) case http_server.APIBandwidthKbps: serverTypes := toData.Get().ServerTypes kbpsStats := lastKbpsStats.Get() sum := float64(0.0) for cache, data := range kbpsStats.Caches { if serverTypes[cache] != enum.CacheTypeEdge { continue } sum += data.Kbps } body = []byte(fmt.Sprintf("%f", sum)) default: err = fmt.Errorf("Unknown Request Type: %v", req.Type) } if err != nil { errorCount.Inc() log.Errorf("Request Error: %v\n", err) } else { req.Response <- body } } } }
func addAvailableData(dsStats Stats, crStates peer.Crstates, serverCachegroups map[enum.CacheName]enum.CacheGroupName, serverDs map[string][]string, serverTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result) (Stats, error) { for cache, available := range crStates.Caches { cacheGroup, ok := serverCachegroups[enum.CacheName(cache)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Cachegroups\n", cache) continue } deliveryServices, ok := serverDs[cache] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in DeliveryServices\n", cache) continue } cacheType, ok := serverTypes[enum.CacheName(cache)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Server Types\n", cache) continue } for _, deliveryService := range deliveryServices { if deliveryService == "" { log.Errorf("EMPTY addAvailableData DS") // various bugs in other functions can cause this - this will help identify and debug them. continue } stat, ok := dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Stats\n", cache) continue // TODO log warning? Error? } if available.IsAvailable { // c.IsAvailable.Value stat.Common.IsAvailable.Value = true stat.Common.CachesAvailable.Value++ cacheGroupStats := stat.CacheGroups[enum.CacheGroupName(cacheGroup)] cacheGroupStats.IsAvailable.Value = true stat.CacheGroups[enum.CacheGroupName(cacheGroup)] = cacheGroupStats stat.Total.IsAvailable.Value = true typeStats := stat.Type[cacheType] typeStats.IsAvailable.Value = true stat.Type[cacheType] = typeStats } // TODO fix nested ifs if results, ok := statHistory[enum.CacheName(cache)]; ok { if len(results) < 1 { log.Warnf("no results %v %v\n", cache, deliveryService) } else { result := results[0] if result.PrecomputedData.Reporting { stat.Common.CachesReporting[enum.CacheName(cache)] = true } else { log.Debugf("no reporting %v %v\n", cache, deliveryService) } } } else { log.Debugf("no result for %v %v\n", cache, deliveryService) } dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = stat // TODO Necessary? Remove? } } return dsStats, nil }
// addKbps adds Kbps fields to the NewStats, based on the previous out_bytes in the oldStats, and the time difference. // // Traffic Server only updates its data every N seconds. So, often we get a new Stats with the same OutBytes as the previous one, // So, we must record the last changed value, and the time it changed. Then, if the new OutBytes is different from the previous, // we set the (new - old) / lastChangedTime as the KBPS, and update the recorded LastChangedTime and LastChangedValue // // This specifically returns the given dsStats and lastKbpsStats on error, so it's safe to do persistentStats, persistentLastKbpsStats, err = addKbps(...) // TODO handle ATS byte rolling (when the `out_bytes` overflows back to 0) func addKbps(statHistory map[enum.CacheName][]cache.Result, dsStats Stats, lastKbpsStats StatsLastKbps, dsStatsTime time.Time) (Stats, StatsLastKbps, error) { for dsName, stat := range dsStats.DeliveryService { lastKbpsStat, lastKbpsStatExists := lastKbpsStats.DeliveryServices[dsName] if !lastKbpsStatExists { lastKbpsStat = newStatLastKbps() } for cgName, cacheStats := range stat.CacheGroups { lastKbpsData, _ := lastKbpsStat.CacheGroups[cgName] if cacheStats.OutBytes.Value == lastKbpsData.Bytes { cacheStats.Kbps.Value = lastKbpsData.Kbps stat.CacheGroups[cgName] = cacheStats continue } if lastKbpsStatExists && lastKbpsData.Bytes != 0 { cacheStats.Kbps.Value = float64(cacheStats.OutBytes.Value-lastKbpsData.Bytes) / dsStatsTime.Sub(lastKbpsData.Time).Seconds() } if cacheStats.Kbps.Value < 0 { cacheStats.Kbps.Value = 0 log.Errorf("addkbps negative cachegroup cacheStats.Kbps.Value: '%v' '%v' %v - %v / %v\n", dsName, cgName, cacheStats.OutBytes.Value, lastKbpsData.Bytes, dsStatsTime.Sub(lastKbpsData.Time).Seconds()) } lastKbpsStat.CacheGroups[cgName] = LastKbpsData{Time: dsStatsTime, Bytes: cacheStats.OutBytes.Value, Kbps: cacheStats.Kbps.Value} stat.CacheGroups[cgName] = cacheStats } for cacheType, cacheStats := range stat.Type { lastKbpsData, _ := lastKbpsStat.Type[cacheType] if cacheStats.OutBytes.Value == lastKbpsData.Bytes { if cacheStats.OutBytes.Value == lastKbpsData.Bytes { if lastKbpsData.Kbps < 0 { log.Errorf("addkbps negative cachetype cacheStats.Kbps.Value!\n") lastKbpsData.Kbps = 0 } cacheStats.Kbps.Value = lastKbpsData.Kbps stat.Type[cacheType] = cacheStats continue } if lastKbpsStatExists && lastKbpsData.Bytes != 0 { cacheStats.Kbps.Value = float64(cacheStats.OutBytes.Value-lastKbpsData.Bytes) / dsStatsTime.Sub(lastKbpsData.Time).Seconds() } if cacheStats.Kbps.Value < 0 { log.Errorf("addkbps negative cachetype cacheStats.Kbps.Value.\n") cacheStats.Kbps.Value = 0 } lastKbpsStat.Type[cacheType] = LastKbpsData{Time: dsStatsTime, Bytes: cacheStats.OutBytes.Value, Kbps: cacheStats.Kbps.Value} stat.Type[cacheType] = cacheStats } } totalChanged := lastKbpsStat.Total.Bytes != stat.Total.OutBytes.Value if lastKbpsStatExists && lastKbpsStat.Total.Bytes != 0 && totalChanged { stat.Total.Kbps.Value = float64(stat.Total.OutBytes.Value-lastKbpsStat.Total.Bytes) / dsStatsTime.Sub(lastKbpsStat.Total.Time).Seconds() / BytesPerKbps if stat.Total.Kbps.Value < 0 { stat.Total.Kbps.Value = 0 log.Errorf("addkbps negative stat.Total.Kbps.Value! Deliveryservice '%v' %v - %v / %v\n", dsName, stat.Total.OutBytes.Value, lastKbpsStat.Total.Bytes, dsStatsTime.Sub(lastKbpsStat.Total.Time).Seconds()) } } else { stat.Total.Kbps.Value = lastKbpsStat.Total.Kbps } if totalChanged { lastKbpsStat.Total = LastKbpsData{Time: dsStatsTime, Bytes: stat.Total.OutBytes.Value, Kbps: stat.Total.Kbps.Value} } lastKbpsStats.DeliveryServices[dsName] = lastKbpsStat dsStats.DeliveryService[dsName] = stat } for cacheName, results := range statHistory { var result *cache.Result for _, r := range results { // result.Errors can include stat errors where OutBytes was set correctly, so we look for the first non-zero OutBytes rather than the first errorless result // TODO add error classes to PrecomputedData, to distinguish stat errors from HTTP errors? if r.PrecomputedData.OutBytes == 0 { continue } result = &r break } if result == nil { log.Warnf("addkbps cache %v has no results\n", cacheName) continue } outBytes := result.PrecomputedData.OutBytes lastCacheKbpsData, ok := lastKbpsStats.Caches[cacheName] if !ok { // this means this is the first result for this cache - this is a normal condition lastKbpsStats.Caches[cacheName] = LastKbpsData{Time: dsStatsTime, Bytes: outBytes, Kbps: 0} continue } if lastCacheKbpsData.Bytes == outBytes { // this means this ATS hasn't updated its byte count yet - this is a normal condition continue // don't try to kbps, and importantly don't change the time of the last change, if Traffic Server hasn't updated } if outBytes == 0 { log.Errorf("addkbps %v outbytes zero\n", cacheName) continue } kbps := float64(outBytes-lastCacheKbpsData.Bytes) / result.Time.Sub(lastCacheKbpsData.Time).Seconds() / BytesPerKbps if lastCacheKbpsData.Bytes == 0 { kbps = 0 log.Errorf("addkbps cache %v lastCacheKbpsData.Bytes zero\n", cacheName) } if kbps < 0 { log.Errorf("addkbps negative cache kbps: cache %v kbps %v outBytes %v lastCacheKbpsData.Bytes %v dsStatsTime %v lastCacheKbpsData.Time %v\n", cacheName, kbps, outBytes, lastCacheKbpsData.Bytes, dsStatsTime, lastCacheKbpsData.Time) // this is almost certainly a code bug. The only case this would ever be a data issue, would be if Traffic Server returned fewer bytes than previously. kbps = 0 } lastKbpsStats.Caches[cacheName] = LastKbpsData{Time: result.Time, Bytes: outBytes, Kbps: kbps} } return dsStats, lastKbpsStats, nil }