// processStatResults processes the given results, creating and setting DSStats, LastStats, and other stats. Note this is NOT threadsafe, and MUST NOT be called from multiple threads. func processStatResults( results []cache.Result, statHistoryThreadsafe StatHistoryThreadsafe, combinedStates peer.Crstates, lastStats LastStatsThreadsafe, toData todata.TOData, errorCount UintThreadsafe, dsStats DSStatsThreadsafe, lastStatEndTimes map[enum.CacheName]time.Time, lastStatDurationsThreadsafe DurationMapThreadsafe, unpolledCaches UnpolledCachesThreadsafe, ) { statHistory := statHistoryThreadsafe.Get().Copy() maxStats := statHistoryThreadsafe.Max() for _, result := range results { // TODO determine if we want to add results with errors, or just print the errors now and don't add them. statHistory[enum.CacheName(result.Id)] = pruneHistory(append(statHistory[enum.CacheName(result.Id)], result), maxStats) } statHistoryThreadsafe.Set(statHistory) for _, result := range results { log.Debugf("poll %v %v CreateStats start\n", result.PollID, time.Now()) } newDsStats, newLastStats, err := ds.CreateStats(statHistory, toData, combinedStates, lastStats.Get().Copy(), time.Now()) for _, result := range results { log.Debugf("poll %v %v CreateStats end\n", result.PollID, time.Now()) } if err != nil { errorCount.Inc() log.Errorf("getting deliveryservice: %v\n", err) } else { dsStats.Set(newDsStats) lastStats.Set(newLastStats) } endTime := time.Now() lastStatDurations := lastStatDurationsThreadsafe.Get().Copy() for _, result := range results { if lastStatStart, ok := lastStatEndTimes[enum.CacheName(result.Id)]; ok { d := time.Since(lastStatStart) lastStatDurations[enum.CacheName(result.Id)] = d } lastStatEndTimes[enum.CacheName(result.Id)] = endTime // log.Debugf("poll %v %v statfinish\n", result.PollID, endTime) result.PollFinished <- result.PollID } lastStatDurationsThreadsafe.Set(lastStatDurations) unpolledCaches.SetPolled(results, lastStats) }
func (handler Handler) Handle(id string, r io.Reader, err error, pollId uint64, pollFinished chan<- uint64) { log.Debugf("poll %v %v handle start\n", pollId, time.Now()) result := Result{ Id: enum.CacheName(id), Available: false, Errors: []error{}, Time: time.Now(), // TODO change this to be computed the instant we get the result back, to minimise inaccuracy PollID: pollId, PollFinished: pollFinished, } if err != nil { log.Errorf("%v handler given error '%v'\n", id, err) // error here, in case the thing that called Handle didn't error result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if r == nil { log.Errorf("%v handle reader nil\n", id) result.Errors = append(result.Errors, fmt.Errorf("handler got nil reader")) handler.ResultChannel <- result return } result.PrecomputedData.Reporting = true if err := json.NewDecoder(r).Decode(&result.Astats); err != nil { log.Errorf("%s procnetdev decode error '%v'\n", id, err) result.Errors = append(result.Errors, err) handler.ResultChannel <- result return } if result.Astats.System.ProcNetDev == "" { log.Warnf("addkbps %s procnetdev empty\n", id) } if result.Astats.System.InfSpeed == 0 { log.Warnf("addkbps %s inf.speed empty\n", id) } log.Debugf("poll %v %v handle decode end\n", pollId, time.Now()) if err != nil { result.Errors = append(result.Errors, err) log.Errorf("addkbps handle %s error '%v'\n", id, err) } else { result.Available = true } if handler.Precompute() { log.Debugf("poll %v %v handle precompute start\n", pollId, time.Now()) result = handler.precompute(result) log.Debugf("poll %v %v handle precompute end\n", pollId, time.Now()) } log.Debugf("poll %v %v handle write start\n", pollId, time.Now()) handler.ResultChannel <- result log.Debugf("poll %v %v handle end\n", pollId, time.Now()) }
func CreateStats(statHistory map[enum.CacheName][]cache.Result, toData todata.TOData, crStates peer.Crstates, lastStats LastStats, now time.Time) (Stats, LastStats, error) { start := time.Now() dsStats := NewStats() for deliveryService, _ := range toData.DeliveryServiceServers { if deliveryService == "" { log.Errorf("EMPTY CreateStats deliveryService") continue } dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = *dsdata.NewStat() } dsStats = setStaticData(dsStats, toData.DeliveryServiceServers) var err error dsStats, err = addAvailableData(dsStats, crStates, toData.ServerCachegroups, toData.ServerDeliveryServices, toData.ServerTypes, statHistory) // TODO move after stat summarisation if err != nil { return dsStats, lastStats, fmt.Errorf("Error getting Cache availability data: %v", err) } for server, history := range statHistory { if len(history) < 1 { continue // TODO warn? } cachegroup, ok := toData.ServerCachegroups[server] if !ok { log.Warnf("server %s has no cachegroup, skipping\n", server) continue } serverType, ok := toData.ServerTypes[enum.CacheName(server)] if !ok { log.Warnf("server %s not in CRConfig, skipping\n", server) continue } result := history[len(history)-1] // TODO check result.PrecomputedData.Errors for ds, resultStat := range result.PrecomputedData.DeliveryServiceStats { if ds == "" { log.Errorf("EMPTY precomputed delivery service") continue } if _, ok := dsStats.DeliveryService[ds]; !ok { dsStats.DeliveryService[ds] = resultStat continue } httpDsStat := dsStats.DeliveryService[ds] httpDsStat.TotalStats = httpDsStat.TotalStats.Sum(resultStat.TotalStats) httpDsStat.CacheGroups[cachegroup] = httpDsStat.CacheGroups[cachegroup].Sum(resultStat.CacheGroups[cachegroup]) httpDsStat.Types[serverType] = httpDsStat.Types[serverType].Sum(resultStat.Types[serverType]) httpDsStat.Caches[server] = httpDsStat.Caches[server].Sum(resultStat.Caches[server]) httpDsStat.CachesTimeReceived[server] = resultStat.CachesTimeReceived[server] httpDsStat.CommonStats = dsStats.DeliveryService[ds].CommonStats dsStats.DeliveryService[ds] = httpDsStat // TODO determine if necessary } } perSecStats, lastStats := addPerSecStats(statHistory, dsStats, lastStats, now, toData.ServerCachegroups, toData.ServerTypes) log.Infof("CreateStats took %v\n", time.Since(start)) return perSecStats, lastStats, nil }
// getServerCachegroups gets the cachegroup of each ATS Edge+Mid Cache server, for the given CDN, from Traffic Ops. // Returns a map[server]cachegroup. func getServerCachegroups(crc CRConfig) (map[enum.CacheName]enum.CacheGroupName, error) { serverCachegroups := map[enum.CacheName]enum.CacheGroupName{} for server, serverData := range crc.ContentServers { serverCachegroups[enum.CacheName(server)] = enum.CacheGroupName(serverData.CacheGroup) } return serverCachegroups, nil }
func processStatPluginRemapStats(server string, stats map[enum.DeliveryServiceName]dsdata.Stat, toData todata.TOData, stat string, statParts []string, value interface{}) (map[enum.DeliveryServiceName]dsdata.Stat, error) { if len(statParts) < 2 { return stats, fmt.Errorf("stat has no remap_stats deliveryservice and name parts") } fqdn := strings.Join(statParts[:len(statParts)-1], ".") ds, ok := toData.DeliveryServiceRegexes.DeliveryService(fqdn) if !ok { return stats, fmt.Errorf("ERROR no delivery service match for fqdn '%v' stat '%v'\n", fqdn, strings.Join(statParts, ".")) } if ds == "" { return stats, fmt.Errorf("ERROR EMPTY delivery service fqdn %v stat %v\n", fqdn, strings.Join(statParts, ".")) } statName := statParts[len(statParts)-1] dsStat, ok := stats[ds] if !ok { newStat := dsdata.NewStat() dsStat = *newStat } if err := addCacheStat(&dsStat.Total, statName, value); err != nil { return stats, err } cachegroup, ok := toData.ServerCachegroups[enum.CacheName(server)] if !ok { return stats, fmt.Errorf("server missing from TOData.ServerCachegroups") // TODO check logs, make sure this isn't normal } dsStat.CacheGroups[cachegroup] = dsStat.Total cacheType, ok := toData.ServerTypes[enum.CacheName(server)] if !ok { return stats, fmt.Errorf("server missing from TOData.ServerTypes") } dsStat.Type[cacheType] = dsStat.Total stats[ds] = dsStat return stats, nil }
// getServerTypes gets the cache type of each ATS Edge+Mid Cache server, for the given CDN, from Traffic Ops. func getServerTypes(crc CRConfig) (map[enum.CacheName]enum.CacheType, error) { serverTypes := map[enum.CacheName]enum.CacheType{} for server, serverData := range crc.ContentServers { t := enum.CacheTypeFromString(serverData.Type) if t == enum.CacheTypeInvalid { return nil, fmt.Errorf("getServerTypes CRConfig unknown type for '%s': '%s'", server, serverData.Type) } serverTypes[enum.CacheName(server)] = t } return serverTypes, nil }
func createApiPeerStates(peerStates map[string]peer.Crstates) ApiPeerStates { apiPeerStates := ApiPeerStates{Peers: map[enum.TrafficMonitorName]map[enum.CacheName][]CacheState{}} for peer, state := range peerStates { if _, ok := apiPeerStates.Peers[enum.TrafficMonitorName(peer)]; !ok { apiPeerStates.Peers[enum.TrafficMonitorName(peer)] = map[enum.CacheName][]CacheState{} } peerState := apiPeerStates.Peers[enum.TrafficMonitorName(peer)] for cache, available := range state.Caches { peerState[enum.CacheName(cache)] = []CacheState{CacheState{Value: available.IsAvailable}} } apiPeerStates.Peers[enum.TrafficMonitorName(peer)] = peerState } return apiPeerStates }
func processHealthResult(cacheHealthChan <-chan cache.Result, toData todata.TODataThreadsafe, localStates peer.CRStatesThreadsafe, lastHealthDurations DurationMapThreadsafe, statHistory StatHistoryThreadsafe, monitorConfig TrafficMonitorConfigMapThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, fetchCount UintThreadsafe, errorCount UintThreadsafe, events EventsThreadsafe, localCacheStatus CacheAvailableStatusThreadsafe, lastHealthEndTimes map[enum.CacheName]time.Time, healthHistory map[enum.CacheName][]cache.Result, results []cache.Result) { if len(results) == 0 { return } toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result monitorConfigCopy := monitorConfig.Get() // copy now, so all calculations are on the same data for _, healthResult := range results { log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now()) fetchCount.Inc() var prevResult cache.Result healthResultHistory := healthHistory[enum.CacheName(healthResult.Id)] // healthResultHistory := healthHistory.Get(enum.CacheName(healthResult.Id)) if len(healthResultHistory) != 0 { prevResult = healthResultHistory[len(healthResultHistory)-1] } health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy) // healthHistory.Set(enum.CacheName(healthResult.Id), pruneHistory(append(healthHistory.Get(enum.CacheName(healthResult.Id)), healthResult), defaultMaxHistory)) healthHistory[enum.CacheName(healthResult.Id)] = pruneHistory(append(healthHistory[enum.CacheName(healthResult.Id)], healthResult), defaultMaxHistory) isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfigCopy) if localStates.Get().Caches[healthResult.Id].IsAvailable != isAvailable { log.Infof("Changing state for %s was: %t now: %t because %s errors: %v", healthResult.Id, prevResult.Available, isAvailable, whyAvailable, healthResult.Errors) events.Add(Event{Time: time.Now().Unix(), Description: whyAvailable, Name: healthResult.Id, Hostname: healthResult.Id, Type: toDataCopy.ServerTypes[enum.CacheName(healthResult.Id)].String(), Available: isAvailable}) } localCacheStatus.Set(enum.CacheName(healthResult.Id), CacheAvailableStatus{Available: isAvailable, Status: monitorConfigCopy.TrafficServer[healthResult.Id].Status}) // TODO move within localStates localStates.SetCache(healthResult.Id, peer.IsAvailable{IsAvailable: isAvailable}) log.Debugf("poll %v %v calculateDeliveryServiceState start\n", healthResult.PollID, time.Now()) calculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates) log.Debugf("poll %v %v calculateDeliveryServiceState end\n", healthResult.PollID, time.Now()) } // TODO determine if we should combineCrStates() here for _, healthResult := range results { if lastHealthStart, ok := lastHealthEndTimes[enum.CacheName(healthResult.Id)]; ok { d := time.Since(lastHealthStart) lastHealthDurations.Set(enum.CacheName(healthResult.Id), d) } lastHealthEndTimes[enum.CacheName(healthResult.Id)] = time.Now() log.Debugf("poll %v %v finish\n", healthResult.PollID, time.Now()) healthResult.PollFinished <- healthResult.PollID } }
func createCacheStatuses(cacheTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result, lastHealthDurations map[enum.CacheName]time.Duration, cacheStates map[string]peer.IsAvailable, lastKbpsStats ds.StatsLastKbps, localCacheStatusThreadsafe CacheAvailableStatusThreadsafe) map[enum.CacheName]CacheStatus { conns := createCacheConnections(statHistory) statii := map[enum.CacheName]CacheStatus{} localCacheStatus := localCacheStatusThreadsafe.Get() for cacheName, cacheType := range cacheTypes { cacheStatHistory, ok := statHistory[cacheName] if !ok { log.Warnf("createCacheStatuses stat history missing cache %s\n", cacheName) continue } if len(cacheStatHistory) < 1 { log.Warnf("createCacheStatuses stat history empty for cache %s\n", cacheName) continue } log.Debugf("createCacheStatuses NOT empty for cache %s\n", cacheName) var loadAverage *float64 procLoadAvg := cacheStatHistory[0].Astats.System.ProcLoadavg if procLoadAvg != "" { firstSpace := strings.IndexRune(procLoadAvg, ' ') if firstSpace == -1 { log.Warnf("WARNING unexpected proc.loadavg '%s' for cache %s\n", procLoadAvg, cacheName) } else { loadAverageVal, err := strconv.ParseFloat(procLoadAvg[:firstSpace], 64) if err != nil { log.Warnf("proc.loadavg doesn't contain a float prefix '%s' for cache %s\n", procLoadAvg, cacheName) } else { loadAverage = &loadAverageVal } } } var queryTime *int64 queryTimeVal, ok := lastHealthDurations[cacheName] if !ok { log.Warnf("cache not in last health durations cache %s\n", cacheName) } else { queryTimeInt := int64(queryTimeVal / time.Millisecond) queryTime = &queryTimeInt } var kbps *float64 kbpsVal, ok := lastKbpsStats.Caches[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in last kbps cache %s\n", cacheName) } else { kbps = &kbpsVal.Kbps } var connections *int64 connectionsVal, ok := conns[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in connections %s\n", cacheName) } else { connections = &connectionsVal } var status *string statusVal, ok := localCacheStatus[enum.CacheName(cacheName)] if !ok { log.Warnf("cache not in statuses %s\n", cacheName) } else { statusString := statusVal.Status + " - " if localCacheStatus[enum.CacheName(cacheName)].Available { statusString += "available" } else { statusString += "unavailable" } status = &statusString } cacheTypeStr := string(cacheType) statii[enum.CacheName(cacheName)] = CacheStatus{Type: &cacheTypeStr, LoadAverage: loadAverage, QueryTimeMilliseconds: queryTime, BandwidthKbps: kbps, ConnectionCount: connections, Status: status} } return statii }
func addAvailableData(dsStats Stats, crStates peer.Crstates, serverCachegroups map[enum.CacheName]enum.CacheGroupName, serverDs map[enum.CacheName][]enum.DeliveryServiceName, serverTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result) (Stats, error) { for cache, available := range crStates.Caches { cacheGroup, ok := serverCachegroups[cache] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Cachegroups\n", cache) continue } deliveryServices, ok := serverDs[cache] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in DeliveryServices\n", cache) continue } cacheType, ok := serverTypes[enum.CacheName(cache)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Server Types\n", cache) continue } for _, deliveryService := range deliveryServices { if deliveryService == "" { log.Errorf("EMPTY addAvailableData DS") // various bugs in other functions can cause this - this will help identify and debug them. continue } stat, ok := dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] if !ok { log.Warnf("CreateStats not adding availability data for '%s': not found in Stats\n", cache) continue // TODO log warning? Error? } if available.IsAvailable { // c.IsAvailable.Value stat.CommonStats.IsAvailable.Value = true stat.CommonStats.CachesAvailableNum.Value++ cacheGroupStats := stat.CacheGroups[enum.CacheGroupName(cacheGroup)] cacheGroupStats.IsAvailable.Value = true stat.CacheGroups[enum.CacheGroupName(cacheGroup)] = cacheGroupStats stat.TotalStats.IsAvailable.Value = true typeStats := stat.Types[cacheType] typeStats.IsAvailable.Value = true stat.Types[cacheType] = typeStats } // TODO fix nested ifs if results, ok := statHistory[enum.CacheName(cache)]; ok { if len(results) < 1 { log.Warnf("no results %v %v\n", cache, deliveryService) } else { result := results[0] if result.PrecomputedData.Reporting { stat.CommonStats.CachesReporting[enum.CacheName(cache)] = true } else { log.Debugf("no reporting %v %v\n", cache, deliveryService) } } } else { log.Debugf("no result for %v %v\n", cache, deliveryService) } dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = stat // TODO Necessary? Remove? } } return dsStats, nil }
// NewPeerStateFilter takes the HTTP query parameters and creates a cache.Filter, filtering according to the query parameters passed. // Query parameters used are `hc`, `stats`, `wildcard`, `typep`, and `hosts`. The `stats` param filters caches. The `hosts` param filters peer Traffic Monitors. The `type` param filters cache types (edge, mid). // If `hc` is 0, all history is returned. If `hc` is empty, 1 history is returned. // If `stats` is empty, all stats are returned. // If `wildcard` is empty, `stats` is considered exact. // If `type` is empty, all cache types are returned. func NewPeerStateFilter(params url.Values, cacheTypes map[enum.CacheName]enum.CacheType) (*PeerStateFilter, error) { // TODO change legacy `stats` and `hosts` to `caches` and `monitors` (or `peers`). validParams := map[string]struct{}{"hc": struct{}{}, "stats": struct{}{}, "wildcard": struct{}{}, "type": struct{}{}, "peers": struct{}{}} if len(params) > len(validParams) { return nil, fmt.Errorf("invalid query parameters") } for param, _ := range params { if _, ok := validParams[param]; !ok { return nil, fmt.Errorf("invalid query parameter '%v'", param) } } historyCount := 1 if paramHc, exists := params["hc"]; exists && len(paramHc) > 0 { v, err := strconv.Atoi(paramHc[0]) if err == nil { historyCount = v } } cachesToUse := map[enum.CacheName]struct{}{} // TODO rename 'stats' to 'caches' if paramStats, exists := params["stats"]; exists && len(paramStats) > 0 { commaStats := strings.Split(paramStats[0], ",") for _, stat := range commaStats { cachesToUse[enum.CacheName(stat)] = struct{}{} } } wildcard := false if paramWildcard, exists := params["wildcard"]; exists && len(paramWildcard) > 0 { wildcard, _ = strconv.ParseBool(paramWildcard[0]) // ignore errors, error => false } cacheType := enum.CacheTypeInvalid if paramType, exists := params["type"]; exists && len(paramType) > 0 { cacheType = enum.CacheTypeFromString(paramType[0]) if cacheType == enum.CacheTypeInvalid { return nil, fmt.Errorf("invalid query parameter type '%v' - valid types are: {edge, mid}", paramType[0]) } } peersToUse := map[enum.TrafficMonitorName]struct{}{} if paramNames, exists := params["peers"]; exists && len(paramNames) > 0 { commaNames := strings.Split(paramNames[0], ",") for _, name := range commaNames { peersToUse[enum.TrafficMonitorName(name)] = struct{}{} } } // parameters without values are considered names, e.g. `?my-cache-0` or `?my-delivery-service` for maybeName, val := range params { if len(val) == 0 || (len(val) == 1 && val[0] == "") { peersToUse[enum.TrafficMonitorName(maybeName)] = struct{}{} } } return &PeerStateFilter{ historyCount: historyCount, cachesToUse: cachesToUse, wildcard: wildcard, cacheType: cacheType, peersToUse: peersToUse, cacheTypes: cacheTypes, }, nil }
// NewCacheStatFilter takes the HTTP query parameters and creates a CacheStatFilter which fulfills the `cache.Filter` interface, filtering according to the query parameters passed. // Query parameters used are `hc`, `stats`, `wildcard`, `type`, and `hosts`. // If `hc` is 0, all history is returned. If `hc` is empty, 1 history is returned. // If `stats` is empty, all stats are returned. // If `wildcard` is empty, `stats` is considered exact. // If `type` is empty, all cache types are returned. func NewCacheStatFilter(params url.Values, cacheTypes map[enum.CacheName]enum.CacheType) (cache.Filter, error) { validParams := map[string]struct{}{"hc": struct{}{}, "stats": struct{}{}, "wildcard": struct{}{}, "type": struct{}{}, "hosts": struct{}{}} if len(params) > len(validParams) { return nil, fmt.Errorf("invalid query parameters") } for param, _ := range params { if _, ok := validParams[param]; !ok { return nil, fmt.Errorf("invalid query parameter '%v'", param) } } historyCount := 1 if paramHc, exists := params["hc"]; exists && len(paramHc) > 0 { v, err := strconv.Atoi(paramHc[0]) if err == nil { historyCount = v } } statsToUse := map[string]struct{}{} if paramStats, exists := params["stats"]; exists && len(paramStats) > 0 { commaStats := strings.Split(paramStats[0], ",") for _, stat := range commaStats { statsToUse[stat] = struct{}{} } } wildcard := false if paramWildcard, exists := params["wildcard"]; exists && len(paramWildcard) > 0 { wildcard, _ = strconv.ParseBool(paramWildcard[0]) // ignore errors, error => false } cacheType := enum.CacheTypeInvalid if paramType, exists := params["type"]; exists && len(paramType) > 0 { cacheType = enum.CacheTypeFromString(paramType[0]) if cacheType == enum.CacheTypeInvalid { return nil, fmt.Errorf("invalid query parameter type '%v' - valid types are: {edge, mid}", paramType[0]) } } hosts := map[enum.CacheName]struct{}{} if paramHosts, exists := params["hosts"]; exists && len(paramHosts) > 0 { commaHosts := strings.Split(paramHosts[0], ",") for _, host := range commaHosts { hosts[enum.CacheName(host)] = struct{}{} } } // parameters without values are considered hosts, e.g. `?my-cache-0` for maybeHost, val := range params { if len(val) == 0 || (len(val) == 1 && val[0] == "") { hosts[enum.CacheName(maybeHost)] = struct{}{} } } return &CacheStatFilter{ historyCount: historyCount, statsToUse: statsToUse, wildcard: wildcard, cacheType: cacheType, hosts: hosts, cacheTypes: cacheTypes, }, nil }
// TODO timing, and determine if the case, or its internal `for`, should be put in a goroutine // TODO determine if subscribers take action on change, and change to mutexed objects if not. func monitorConfigListen(monitorConfigTS TrafficMonitorConfigMapThreadsafe, monitorConfigPollChan <-chan to.TrafficMonitorConfigMap, localStates peer.CRStatesThreadsafe, statUrlSubscriber chan<- poller.HttpPollerConfig, healthUrlSubscriber chan<- poller.HttpPollerConfig, peerUrlSubscriber chan<- poller.HttpPollerConfig, cfg config.Config, staticAppData StaticAppData) { for { select { case monitorConfig := <-monitorConfigPollChan: monitorConfigTS.Set(monitorConfig) healthUrls := map[string]string{} statUrls := map[string]string{} peerUrls := map[string]string{} caches := map[string]string{} for _, srv := range monitorConfig.TrafficServer { caches[srv.HostName] = srv.Status cacheName := enum.CacheName(srv.HostName) if srv.Status == "ONLINE" { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: true}) continue } if srv.Status == "OFFLINE" { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: false}) continue } // seed states with available = false until our polling cycle picks up a result if _, exists := localStates.Get().Caches[cacheName]; !exists { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: false}) } url := monitorConfig.Profile[srv.Profile].Parameters.HealthPollingURL r := strings.NewReplacer( "${hostname}", srv.FQDN, "${interface_name}", srv.InterfaceName, "application=system", "application=plugin.remap", "application=", "application=plugin.remap", ) url = r.Replace(url) healthUrls[srv.HostName] = url r = strings.NewReplacer("application=plugin.remap", "application=") url = r.Replace(url) statUrls[srv.HostName] = url } for _, srv := range monitorConfig.TrafficMonitor { if srv.HostName == staticAppData.Hostname { continue } if srv.Status != "ONLINE" { continue } // TODO: the URL should be config driven. -jse url := fmt.Sprintf("http://%s:%d/publish/CrStates?raw", srv.IP, srv.Port) peerUrls[srv.HostName] = url } statUrlSubscriber <- poller.HttpPollerConfig{Urls: statUrls, Interval: cfg.CacheStatPollingInterval} healthUrlSubscriber <- poller.HttpPollerConfig{Urls: healthUrls, Interval: cfg.CacheHealthPollingInterval} peerUrlSubscriber <- poller.HttpPollerConfig{Urls: peerUrls, Interval: cfg.PeerPollingInterval} for cacheName := range localStates.GetCaches() { if _, exists := monitorConfig.TrafficServer[string(cacheName)]; !exists { log.Warnf("Removing %s from localStates", cacheName) localStates.DeleteCache(cacheName) } } // TODO because there are multiple writers to localStates.DeliveryService, there is a race condition, where MonitorConfig (this func) and HealthResultManager could write at the same time, and the HealthResultManager could overwrite a delivery service addition or deletion here. Probably the simplest and most performant fix would be a lock-free algorithm using atomic compare-and-swaps. for _, ds := range monitorConfig.DeliveryService { // since caches default to unavailable, also default DS false if _, exists := localStates.Get().Deliveryservice[enum.DeliveryServiceName(ds.XMLID)]; !exists { localStates.SetDeliveryService(enum.DeliveryServiceName(ds.XMLID), peer.Deliveryservice{IsAvailable: false, DisabledLocations: []enum.CacheName{}}) // important to initialize DisabledLocations, so JSON is `[]` not `null` } } for ds, _ := range localStates.Get().Deliveryservice { if _, exists := monitorConfig.DeliveryService[string(ds)]; !exists { localStates.DeleteDeliveryService(ds) } } } } }