// processHealthResult processes the given health results, adding their stats to the CacheAvailableStatus. Note this is NOT threadsafe, because it non-atomically gets CacheAvailableStatuses, Events, LastHealthDurations and later updates them. This MUST NOT be called from multiple threads. func processHealthResult(cacheHealthChan <-chan cache.Result, toData todata.TODataThreadsafe, localStates peer.CRStatesThreadsafe, lastHealthDurationsThreadsafe DurationMapThreadsafe, statHistory StatHistoryThreadsafe, monitorConfig TrafficMonitorConfigMapThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, fetchCount UintThreadsafe, errorCount UintThreadsafe, events EventsThreadsafe, localCacheStatusThreadsafe CacheAvailableStatusThreadsafe, lastHealthEndTimes map[enum.CacheName]time.Time, healthHistory map[enum.CacheName][]cache.Result, results []cache.Result, cfg config.Config) { if len(results) == 0 { return } toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result localCacheStatus := localCacheStatusThreadsafe.Get().Copy() monitorConfigCopy := monitorConfig.Get() for _, healthResult := range results { log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now()) fetchCount.Inc() var prevResult cache.Result healthResultHistory := healthHistory[enum.CacheName(healthResult.Id)] // healthResultHistory := healthHistory.Get(enum.CacheName(healthResult.Id)) if len(healthResultHistory) != 0 { prevResult = healthResultHistory[len(healthResultHistory)-1] } health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy) // healthHistory.Set(enum.CacheName(healthResult.Id), pruneHistory(append(healthHistory.Get(enum.CacheName(healthResult.Id)), healthResult), defaultMaxHistory)) healthHistory[enum.CacheName(healthResult.Id)] = pruneHistory(append(healthHistory[enum.CacheName(healthResult.Id)], healthResult), cfg.MaxHealthHistory) isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfigCopy) if localStates.Get().Caches[healthResult.Id].IsAvailable != isAvailable { log.Infof("Changing state for %s was: %t now: %t because %s errors: %v", healthResult.Id, prevResult.Available, isAvailable, whyAvailable, healthResult.Errors) events.Add(Event{Time: time.Now().Unix(), Description: whyAvailable, Name: healthResult.Id, Hostname: healthResult.Id, Type: toDataCopy.ServerTypes[healthResult.Id].String(), Available: isAvailable}) } localCacheStatus[healthResult.Id] = CacheAvailableStatus{Available: isAvailable, Status: monitorConfigCopy.TrafficServer[string(healthResult.Id)].Status} // TODO move within localStates? localStates.SetCache(healthResult.Id, peer.IsAvailable{IsAvailable: isAvailable}) log.Debugf("poll %v %v calculateDeliveryServiceState start\n", healthResult.PollID, time.Now()) calculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates) log.Debugf("poll %v %v calculateDeliveryServiceState end\n", healthResult.PollID, time.Now()) } localCacheStatusThreadsafe.Set(localCacheStatus) // TODO determine if we should combineCrStates() here lastHealthDurations := lastHealthDurationsThreadsafe.Get().Copy() for _, healthResult := range results { if lastHealthStart, ok := lastHealthEndTimes[enum.CacheName(healthResult.Id)]; ok { d := time.Since(lastHealthStart) lastHealthDurations[enum.CacheName(healthResult.Id)] = d } lastHealthEndTimes[enum.CacheName(healthResult.Id)] = time.Now() log.Debugf("poll %v %v finish\n", healthResult.PollID, time.Now()) healthResult.PollFinished <- healthResult.PollID } lastHealthDurationsThreadsafe.Set(lastHealthDurations) }
// TODO timing, and determine if the case, or its internal `for`, should be put in a goroutine // TODO determine if subscribers take action on change, and change to mutexed objects if not. func monitorConfigListen(monitorConfigTS TrafficMonitorConfigMapThreadsafe, monitorConfigPollChan <-chan to.TrafficMonitorConfigMap, localStates peer.CRStatesThreadsafe, statUrlSubscriber chan<- poller.HttpPollerConfig, healthUrlSubscriber chan<- poller.HttpPollerConfig, peerUrlSubscriber chan<- poller.HttpPollerConfig, cfg config.Config, staticAppData StaticAppData) { for { select { case monitorConfig := <-monitorConfigPollChan: monitorConfigTS.Set(monitorConfig) healthUrls := map[string]string{} statUrls := map[string]string{} peerUrls := map[string]string{} caches := map[string]string{} for _, srv := range monitorConfig.TrafficServer { caches[srv.HostName] = srv.Status cacheName := enum.CacheName(srv.HostName) if srv.Status == "ONLINE" { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: true}) continue } if srv.Status == "OFFLINE" { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: false}) continue } // seed states with available = false until our polling cycle picks up a result if _, exists := localStates.Get().Caches[cacheName]; !exists { localStates.SetCache(cacheName, peer.IsAvailable{IsAvailable: false}) } url := monitorConfig.Profile[srv.Profile].Parameters.HealthPollingURL r := strings.NewReplacer( "${hostname}", srv.FQDN, "${interface_name}", srv.InterfaceName, "application=system", "application=plugin.remap", "application=", "application=plugin.remap", ) url = r.Replace(url) healthUrls[srv.HostName] = url r = strings.NewReplacer("application=plugin.remap", "application=") url = r.Replace(url) statUrls[srv.HostName] = url } for _, srv := range monitorConfig.TrafficMonitor { if srv.HostName == staticAppData.Hostname { continue } if srv.Status != "ONLINE" { continue } // TODO: the URL should be config driven. -jse url := fmt.Sprintf("http://%s:%d/publish/CrStates?raw", srv.IP, srv.Port) peerUrls[srv.HostName] = url } statUrlSubscriber <- poller.HttpPollerConfig{Urls: statUrls, Interval: cfg.CacheStatPollingInterval} healthUrlSubscriber <- poller.HttpPollerConfig{Urls: healthUrls, Interval: cfg.CacheHealthPollingInterval} peerUrlSubscriber <- poller.HttpPollerConfig{Urls: peerUrls, Interval: cfg.PeerPollingInterval} for cacheName := range localStates.GetCaches() { if _, exists := monitorConfig.TrafficServer[string(cacheName)]; !exists { log.Warnf("Removing %s from localStates", cacheName) localStates.DeleteCache(cacheName) } } // TODO because there are multiple writers to localStates.DeliveryService, there is a race condition, where MonitorConfig (this func) and HealthResultManager could write at the same time, and the HealthResultManager could overwrite a delivery service addition or deletion here. Probably the simplest and most performant fix would be a lock-free algorithm using atomic compare-and-swaps. for _, ds := range monitorConfig.DeliveryService { // since caches default to unavailable, also default DS false if _, exists := localStates.Get().Deliveryservice[enum.DeliveryServiceName(ds.XMLID)]; !exists { localStates.SetDeliveryService(enum.DeliveryServiceName(ds.XMLID), peer.Deliveryservice{IsAvailable: false, DisabledLocations: []enum.CacheName{}}) // important to initialize DisabledLocations, so JSON is `[]` not `null` } } for ds, _ := range localStates.Get().Deliveryservice { if _, exists := monitorConfig.DeliveryService[string(ds)]; !exists { localStates.DeleteDeliveryService(ds) } } } } }
// TODO timing, and determine if the case, or its internal `for`, should be put in a goroutine // TODO determine if subscribers take action on change, and change to mutexed objects if not. func monitorConfigListen(monitorConfigTS TrafficMonitorConfigMapThreadsafe, monitorConfigPollChan <-chan to.TrafficMonitorConfigMap, localStates peer.CRStatesThreadsafe, statUrlSubscriber chan<- poller.HttpPollerConfig, healthUrlSubscriber chan<- poller.HttpPollerConfig, peerUrlSubscriber chan<- poller.HttpPollerConfig) { for { select { case monitorConfig := <-monitorConfigPollChan: monitorConfigTS.Set(monitorConfig) healthUrls := map[string]string{} statUrls := map[string]string{} peerUrls := map[string]string{} caches := map[string]string{} for _, srv := range monitorConfig.TrafficServer { caches[srv.HostName] = srv.Status if srv.Status == "ONLINE" { localStates.SetCache(srv.HostName, peer.IsAvailable{IsAvailable: true}) continue } if srv.Status == "OFFLINE" { localStates.SetCache(srv.HostName, peer.IsAvailable{IsAvailable: false}) continue } // seed states with available = false until our polling cycle picks up a result if _, exists := localStates.Get().Caches[srv.HostName]; !exists { localStates.SetCache(srv.HostName, peer.IsAvailable{IsAvailable: false}) } url := monitorConfig.Profile[srv.Profile].Parameters.HealthPollingURL r := strings.NewReplacer( "${hostname}", srv.FQDN, "${interface_name}", srv.InterfaceName, "application=system", "application=plugin.remap", "application=", "application=plugin.remap", ) url = r.Replace(url) healthUrls[srv.HostName] = url r = strings.NewReplacer("application=plugin.remap", "application=") url = r.Replace(url) statUrls[srv.HostName] = url } for _, srv := range monitorConfig.TrafficMonitor { if srv.Status != "ONLINE" { continue } // TODO: the URL should be config driven. -jse url := fmt.Sprintf("http://%s:%d/publish/CrStates?raw", srv.IP, srv.Port) peerUrls[srv.HostName] = url } statUrlSubscriber <- poller.HttpPollerConfig{Urls: statUrls, Interval: defaultCacheStatPollingInterval} healthUrlSubscriber <- poller.HttpPollerConfig{Urls: healthUrls, Interval: defaultCacheHealthPollingInterval} peerUrlSubscriber <- poller.HttpPollerConfig{Urls: peerUrls, Interval: defaultPeerPollingInterval} for k := range localStates.GetCaches() { if _, exists := monitorConfig.TrafficServer[k]; !exists { log.Warnf("Removing %s from localStates", k) localStates.DeleteCache(k) } } addStateDeliveryServices(monitorConfig, localStates.Get().Deliveryservice) } } }