Пример #1
0
// StartStatHistoryManager fetches the full statistics data from ATS Astats. This includes everything needed for all calculations, such as Delivery Services. This is expensive, though, and may be hard on ATS, so it should poll less often.
// For a fast 'is it alive' poll, use the Health Result Manager poll.
// Returns the stat history, the duration between the stat poll for each cache, the last Kbps data, and the calculated Delivery Service stats.
func StartStatHistoryManager(cacheStatChan <-chan cache.Result, combinedStates peer.CRStatesThreadsafe, toData todata.TODataThreadsafe, errorCount UintThreadsafe, cfg config.Config) (StatHistoryThreadsafe, DurationMapThreadsafe, LastStatsThreadsafe, DSStatsThreadsafe) {
	statHistory := NewStatHistoryThreadsafe(cfg.MaxStatHistory)
	lastStatDurations := NewDurationMapThreadsafe()
	lastStatEndTimes := map[enum.CacheName]time.Time{}
	lastStats := NewLastStatsThreadsafe()
	dsStats := NewDSStatsThreadsafe()
	tickInterval := cfg.StatFlushInterval
	go func() {
		for {
			var results []cache.Result
			results = append(results, <-cacheStatChan)
			tick := time.Tick(tickInterval)
		innerLoop:
			for {
				select {
				case <-tick:
					log.Warnf("StatHistoryManager flushing queued results\n")
					processStatResults(results, statHistory, combinedStates.Get(), lastStats, toData.Get(), errorCount, dsStats, lastStatEndTimes, lastStatDurations)
					break innerLoop
				default:
					select {
					case r := <-cacheStatChan:
						results = append(results, r)
					default:
						processStatResults(results, statHistory, combinedStates.Get(), lastStats, toData.Get(), errorCount, dsStats, lastStatEndTimes, lastStatDurations)
						break innerLoop
					}
				}
			}
		}
	}()
	return statHistory, lastStatDurations, lastStats, dsStats
}
Пример #2
0
// StartStatHistoryManager fetches the full statistics data from ATS Astats. This includes everything needed for all calculations, such as Delivery Services. This is expensive, though, and may be hard on ATS, so it should poll less often.
// For a fast 'is it alive' poll, use the Health Result Manager poll.
// Returns the stat history, the duration between the stat poll for each cache, the last Kbps data, the calculated Delivery Service stats, and the unpolled caches list.
func StartStatHistoryManager(
	cacheStatChan <-chan cache.Result,
	localStates peer.CRStatesThreadsafe,
	combinedStates peer.CRStatesThreadsafe,
	toData todata.TODataThreadsafe,
	cachesChanged <-chan struct{},
	errorCount UintThreadsafe,
	cfg config.Config,
	monitorConfig TrafficMonitorConfigMapThreadsafe,
) (StatHistoryThreadsafe, DurationMapThreadsafe, LastStatsThreadsafe, DSStatsThreadsafe, UnpolledCachesThreadsafe) {
	statHistory := NewStatHistoryThreadsafe(cfg.MaxStatHistory)
	lastStatDurations := NewDurationMapThreadsafe()
	lastStatEndTimes := map[enum.CacheName]time.Time{}
	lastStats := NewLastStatsThreadsafe()
	dsStats := NewDSStatsThreadsafe()
	unpolledCaches := NewUnpolledCachesThreadsafe()
	tickInterval := cfg.StatFlushInterval
	go func() {

		<-cachesChanged // wait for the signal that localStates have been set
		unpolledCaches.SetNewCaches(getNewCaches(localStates, monitorConfig))

		for {
			var results []cache.Result
			results = append(results, <-cacheStatChan)
			tick := time.Tick(tickInterval)
		innerLoop:
			for {
				select {
				case <-cachesChanged:
					unpolledCaches.SetNewCaches(getNewCaches(localStates, monitorConfig))
				case <-tick:
					log.Warnf("StatHistoryManager flushing queued results\n")
					processStatResults(results, statHistory, combinedStates.Get(), lastStats, toData.Get(), errorCount, dsStats, lastStatEndTimes, lastStatDurations, unpolledCaches)
					break innerLoop
				default:
					select {
					case r := <-cacheStatChan:
						results = append(results, r)
					default:
						processStatResults(results, statHistory, combinedStates.Get(), lastStats, toData.Get(), errorCount, dsStats, lastStatEndTimes, lastStatDurations, unpolledCaches)
						break innerLoop
					}
				}
			}
		}
	}()
	return statHistory, lastStatDurations, lastStats, dsStats, unpolledCaches
}
// processHealthResult processes the given health results, adding their stats to the CacheAvailableStatus. Note this is NOT threadsafe, because it non-atomically gets CacheAvailableStatuses, Events, LastHealthDurations and later updates them. This MUST NOT be called from multiple threads.
func processHealthResult(cacheHealthChan <-chan cache.Result, toData todata.TODataThreadsafe, localStates peer.CRStatesThreadsafe, lastHealthDurationsThreadsafe DurationMapThreadsafe, statHistory StatHistoryThreadsafe, monitorConfig TrafficMonitorConfigMapThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, fetchCount UintThreadsafe, errorCount UintThreadsafe, events EventsThreadsafe, localCacheStatusThreadsafe CacheAvailableStatusThreadsafe, lastHealthEndTimes map[enum.CacheName]time.Time, healthHistory map[enum.CacheName][]cache.Result, results []cache.Result, cfg config.Config) {
	if len(results) == 0 {
		return
	}
	toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result
	localCacheStatus := localCacheStatusThreadsafe.Get().Copy()
	monitorConfigCopy := monitorConfig.Get()
	for _, healthResult := range results {
		log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now())
		fetchCount.Inc()
		var prevResult cache.Result
		healthResultHistory := healthHistory[enum.CacheName(healthResult.Id)]
		// healthResultHistory := healthHistory.Get(enum.CacheName(healthResult.Id))
		if len(healthResultHistory) != 0 {
			prevResult = healthResultHistory[len(healthResultHistory)-1]
		}

		health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy)
		// healthHistory.Set(enum.CacheName(healthResult.Id), pruneHistory(append(healthHistory.Get(enum.CacheName(healthResult.Id)), healthResult), defaultMaxHistory))
		healthHistory[enum.CacheName(healthResult.Id)] = pruneHistory(append(healthHistory[enum.CacheName(healthResult.Id)], healthResult), cfg.MaxHealthHistory)
		isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfigCopy)
		if localStates.Get().Caches[healthResult.Id].IsAvailable != isAvailable {
			log.Infof("Changing state for %s was: %t now: %t because %s errors: %v", healthResult.Id, prevResult.Available, isAvailable, whyAvailable, healthResult.Errors)
			events.Add(Event{Time: time.Now().Unix(), Description: whyAvailable, Name: healthResult.Id, Hostname: healthResult.Id, Type: toDataCopy.ServerTypes[healthResult.Id].String(), Available: isAvailable})
		}

		localCacheStatus[healthResult.Id] = CacheAvailableStatus{Available: isAvailable, Status: monitorConfigCopy.TrafficServer[string(healthResult.Id)].Status} // TODO move within localStates?
		localStates.SetCache(healthResult.Id, peer.IsAvailable{IsAvailable: isAvailable})
		log.Debugf("poll %v %v calculateDeliveryServiceState start\n", healthResult.PollID, time.Now())
		calculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates)
		log.Debugf("poll %v %v calculateDeliveryServiceState end\n", healthResult.PollID, time.Now())
	}
	localCacheStatusThreadsafe.Set(localCacheStatus)
	// TODO determine if we should combineCrStates() here

	lastHealthDurations := lastHealthDurationsThreadsafe.Get().Copy()
	for _, healthResult := range results {
		if lastHealthStart, ok := lastHealthEndTimes[enum.CacheName(healthResult.Id)]; ok {
			d := time.Since(lastHealthStart)
			lastHealthDurations[enum.CacheName(healthResult.Id)] = d
		}
		lastHealthEndTimes[enum.CacheName(healthResult.Id)] = time.Now()

		log.Debugf("poll %v %v finish\n", healthResult.PollID, time.Now())
		healthResult.PollFinished <- healthResult.PollID
	}
	lastHealthDurationsThreadsafe.Set(lastHealthDurations)
}
Пример #4
0
func dataRequestManagerListen(dr <-chan http_server.DataRequest, opsConfig OpsConfigThreadsafe, toSession towrap.ITrafficOpsSession, localStates peer.CRStatesThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, statHistory StatHistoryThreadsafe, dsStats DSStatsThreadsafe, events EventsThreadsafe, staticAppData StaticAppData, healthPollInterval time.Duration, lastHealthDurations DurationMapThreadsafe, fetchCount UintThreadsafe, healthIteration UintThreadsafe, errorCount UintThreadsafe, toData todata.TODataThreadsafe, localCacheStatus CacheAvailableStatusThreadsafe, lastKbpsStats StatsLastKbpsThreadsafe) {
	for {
		select {
		case req := <-dr:
			defer close(req.Response)

			var body []byte
			var err error

			switch req.Type {
			case http_server.TRConfig:
				cdnName := opsConfig.Get().CdnName
				if toSession == nil {
					err = fmt.Errorf("Unable to connect to Traffic Ops")
				} else if cdnName == "" {
					err = fmt.Errorf("No CDN Configured")
				} else {
					body, err = toSession.CRConfigRaw(cdnName)
				}
				if err != nil {
					err = fmt.Errorf("TR Config: %v", err)
				}
			case http_server.TRStateDerived:
				body, err = peer.CrstatesMarshall(combinedStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (derived): %v", err)
				}
			case http_server.TRStateSelf:
				body, err = peer.CrstatesMarshall(localStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (self): %v", err)
				}
			case http_server.CacheStats:
				// TODO: add support for ?hc=N query param, stats=, wildcard, individual caches
				// add pp and date to the json:
				/*
					pp: "0=[my-ats-edge-cache-1], hc=[1]",
					date: "Thu Oct 09 20:28:36 UTC 2014"
				*/
				params := req.Parameters
				hc := 1
				if _, exists := params["hc"]; exists {
					v, err := strconv.Atoi(params["hc"][0])
					if err == nil {
						hc = v
					}
				}
				body, err = cache.StatsMarshall(statHistory.Get(), hc)
				if err != nil {
					err = fmt.Errorf("CacheStats: %v", err)
				}
			case http_server.DSStats:
				body, err = json.Marshal(ds.StatsJSON(dsStats.Get())) // TODO marshall beforehand, for performance? (test to see how often requests are made)
				if err != nil {
					err = fmt.Errorf("DsStats: %v", err)
				}
			case http_server.EventLog:
				body, err = json.Marshal(JSONEvents{Events: events.Get()})
				if err != nil {
					err = fmt.Errorf("EventLog: %v", err)
				}
			case http_server.PeerStates:
				body, err = json.Marshal(createApiPeerStates(peerStates.Get()))
			case http_server.StatSummary:
				body = []byte("TODO implement")
			case http_server.Stats:
				body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get())
				if err != nil {
					err = fmt.Errorf("Stats: %v", err)
				}
			case http_server.ConfigDoc:
				opsConfigCopy := opsConfig.Get()
				// if the password is blank, leave it blank, so callers can see it's missing.
				if opsConfigCopy.Password != "" {
					opsConfigCopy.Password = "******"
				}
				body, err = json.Marshal(opsConfigCopy)
				if err != nil {
					err = fmt.Errorf("Config Doc: %v", err)
				}
			case http_server.APICacheCount: // TODO determine if this should use peerStates
				body = []byte(strconv.Itoa(len(localStates.Get().Caches)))
			case http_server.APICacheAvailableCount:
				body = []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches)))
			case http_server.APICacheDownCount:
				body = []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches)))
			case http_server.APIVersion:
				s := "traffic_monitor-" + staticAppData.Version + "."
				if len(staticAppData.GitRevision) > 6 {
					s += staticAppData.GitRevision[:6]
				} else {
					s += staticAppData.GitRevision
				}
				body = []byte(s)
			case http_server.APITrafficOpsURI:
				body = []byte(opsConfig.Get().Url)
			case http_server.APICacheStates:
				body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(), lastHealthDurations.Get(), localStates.Get().Caches, lastKbpsStats.Get(), localCacheStatus))
			case http_server.APIBandwidthKbps:
				serverTypes := toData.Get().ServerTypes
				kbpsStats := lastKbpsStats.Get()
				sum := float64(0.0)
				for cache, data := range kbpsStats.Caches {
					if serverTypes[cache] != enum.CacheTypeEdge {
						continue
					}
					sum += data.Kbps
				}
				body = []byte(fmt.Sprintf("%f", sum))
			default:
				err = fmt.Errorf("Unknown Request Type: %v", req.Type)
			}

			if err != nil {
				errorCount.Inc()
				log.Errorf("Request Error: %v\n", err)
			} else {
				req.Response <- body
			}
		}
	}
}
Пример #5
0
// DataRequest takes an `http_server.DataRequest`, and the monitored data objects, and returns the appropriate response, and the status code.
func DataRequest(
	req http_server.DataRequest,
	opsConfig OpsConfigThreadsafe,
	toSession towrap.ITrafficOpsSession,
	localStates peer.CRStatesThreadsafe,
	peerStates peer.CRStatesPeersThreadsafe,
	combinedStates peer.CRStatesThreadsafe,
	statHistory StatHistoryThreadsafe,
	dsStats DSStatsThreadsafe,
	events EventsThreadsafe,
	staticAppData StaticAppData,
	healthPollInterval time.Duration,
	lastHealthDurations DurationMapThreadsafe,
	fetchCount UintThreadsafe,
	healthIteration UintThreadsafe,
	errorCount UintThreadsafe,
	toData todata.TODataThreadsafe,
	localCacheStatus CacheAvailableStatusThreadsafe,
	lastStats LastStatsThreadsafe,
	unpolledCaches UnpolledCachesThreadsafe,
) (body []byte, responseCode int) {

	// handleErr takes an error, and the request type it came from, and logs. It is ok to call with a nil error, in which case this is a no-op.
	handleErr := func(err error) {
		if err == nil {
			return
		}
		errorCount.Inc()
		log.Errorf("Request Error: %v\n", fmt.Errorf(req.Type.String()+": %v", err))
	}

	// commonReturn takes the body, err, and the data request Type which has been processed. It logs and deals with any error, and returns the appropriate bytes and response code for the `http_server`.
	commonReturn := func(body []byte, err error) ([]byte, int) {
		if err == nil {
			return body, http.StatusOK
		}
		handleErr(err)
		return nil, http.StatusInternalServerError
	}

	if unpolledCaches.Any() {
		handleErr(fmt.Errorf("service still starting, some caches unpolled"))
		return []byte("Service Unavailable"), http.StatusServiceUnavailable
	}

	var err error
	switch req.Type {
	case http_server.TRConfig:
		cdnName := opsConfig.Get().CdnName
		if toSession == nil {
			return commonReturn(nil, fmt.Errorf("Unable to connect to Traffic Ops"))
		}
		if cdnName == "" {
			return commonReturn(nil, fmt.Errorf("No CDN Configured"))
		}
		return commonReturn(body, err)
	case http_server.TRStateDerived:
		body, err = peer.CrstatesMarshall(combinedStates.Get())
		return commonReturn(body, err)
	case http_server.TRStateSelf:
		body, err = peer.CrstatesMarshall(localStates.Get())
		return commonReturn(body, err)
	case http_server.CacheStats:
		filter, err := NewCacheStatFilter(req.Parameters, toData.Get().ServerTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}
		body, err = cache.StatsMarshall(statHistory.Get(), filter, req.Parameters)
		return commonReturn(body, err)
	case http_server.DSStats:
		filter, err := NewDSStatFilter(req.Parameters, toData.Get().DeliveryServiceTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}
		body, err = json.Marshal(dsStats.Get().JSON(filter, req.Parameters)) // TODO marshall beforehand, for performance? (test to see how often requests are made)
		return commonReturn(body, err)
	case http_server.EventLog:
		body, err = json.Marshal(JSONEvents{Events: events.Get()})
		return commonReturn(body, err)
	case http_server.PeerStates:
		filter, err := NewPeerStateFilter(req.Parameters, toData.Get().ServerTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}

		body, err = json.Marshal(createApiPeerStates(peerStates.Get(), filter, req.Parameters))
		return commonReturn(body, err)
	case http_server.StatSummary:
		return nil, http.StatusNotImplemented
	case http_server.Stats:
		body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get())
		return commonReturn(body, err)
	case http_server.ConfigDoc:
		opsConfigCopy := opsConfig.Get()
		// if the password is blank, leave it blank, so callers can see it's missing.
		if opsConfigCopy.Password != "" {
			opsConfigCopy.Password = "******"
		}
		body, err = json.Marshal(opsConfigCopy)
		return commonReturn(body, err)
	case http_server.APICacheCount: // TODO determine if this should use peerStates
		return []byte(strconv.Itoa(len(localStates.Get().Caches))), http.StatusOK
	case http_server.APICacheAvailableCount:
		return []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches))), http.StatusOK
	case http_server.APICacheDownCount:
		return []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches))), http.StatusOK
	case http_server.APIVersion:
		s := "traffic_monitor-" + staticAppData.Version + "."
		if len(staticAppData.GitRevision) > 6 {
			s += staticAppData.GitRevision[:6]
		} else {
			s += staticAppData.GitRevision
		}
		return []byte(s), http.StatusOK
	case http_server.APITrafficOpsURI:
		return []byte(opsConfig.Get().Url), http.StatusOK
	case http_server.APICacheStates:
		body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(),
			lastHealthDurations.Get(), localStates.Get().Caches, lastStats.Get(), localCacheStatus))
		return commonReturn(body, err)
	case http_server.APIBandwidthKbps:
		serverTypes := toData.Get().ServerTypes
		kbpsStats := lastStats.Get()
		sum := float64(0.0)
		for cache, data := range kbpsStats.Caches {
			if serverTypes[cache] != enum.CacheTypeEdge {
				continue
			}
			sum += data.Bytes.PerSec / ds.BytesPerKilobit
		}
		return []byte(fmt.Sprintf("%f", sum)), http.StatusOK
	case http_server.APIBandwidthCapacityKbps:
		statHistory := statHistory.Get()
		cap := int64(0)
		for _, results := range statHistory {
			if len(results) == 0 {
				continue
			}
			cap += results[0].MaxKbps
		}
		return []byte(fmt.Sprintf("%d", cap)), http.StatusOK
	default:
		return commonReturn(nil, fmt.Errorf("Unknown Request Type"))
	}
}