Beispiel #1
0
func (handler Handler) Handle(id string, r io.Reader, err error, pollId uint64, pollFinished chan<- uint64) {
	log.Debugf("poll %v %v handle start\n", pollId, time.Now())
	result := Result{
		Id:           id,
		Available:    false,
		Errors:       []error{},
		Time:         time.Now(), // TODO change this to be computed the instant we get the result back, to minimise inaccuracy
		PollID:       pollId,
		PollFinished: pollFinished,
	}

	if err != nil {
		log.Errorf("%v handler given error '%v'\n", id, err) // error here, in case the thing that called Handle didn't error
		result.Errors = append(result.Errors, err)
		handler.ResultChannel <- result
		return
	}

	if r == nil {
		log.Errorf("%v handle reader nil\n", id)
		result.Errors = append(result.Errors, fmt.Errorf("handler got nil reader"))
		handler.ResultChannel <- result
		return
	}

	result.PrecomputedData.Reporting = true

	if err := json.NewDecoder(r).Decode(&result.Astats); err != nil {
		log.Errorf("%s procnetdev decode error '%v'\n", id, err)
		result.Errors = append(result.Errors, err)
		handler.ResultChannel <- result
		return
	}

	if result.Astats.System.ProcNetDev == "" {
		log.Warnf("addkbps %s procnetdev empty\n", id)
	}

	log.Debugf("poll %v %v handle decode end\n", pollId, time.Now())

	if err != nil {
		result.Errors = append(result.Errors, err)
		log.Errorf("addkbps handle %s error '%v'\n", id, err)
	} else {
		result.Available = true
	}

	if handler.Precompute() {
		log.Debugf("poll %v %v handle precompute start\n", pollId, time.Now())
		result = handler.precompute(result)
		log.Debugf("poll %v %v handle precompute end\n", pollId, time.Now())
	}
	log.Debugf("poll %v %v handle write start\n", pollId, time.Now())
	handler.ResultChannel <- result
	log.Debugf("poll %v %v handle end\n", pollId, time.Now())
}
Beispiel #2
0
func CreateStats(statHistory map[enum.CacheName][]cache.Result, toData todata.TOData, crStates peer.Crstates, lastKbpsStats StatsLastKbps, now time.Time) (Stats, StatsLastKbps, error) {
	start := time.Now()
	dsStats := NewStats()
	for deliveryService, _ := range toData.DeliveryServiceServers {
		if deliveryService == "" {
			log.Errorf("EMPTY CreateStats deliveryService")
			continue
		}
		dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = *dsdata.NewStat()
	}
	dsStats = setStaticData(dsStats, toData.DeliveryServiceServers)
	var err error
	dsStats, err = addAvailableData(dsStats, crStates, toData.ServerCachegroups, toData.ServerDeliveryServices, toData.ServerTypes, statHistory) // TODO move after stat summarisation
	if err != nil {
		return dsStats, lastKbpsStats, fmt.Errorf("Error getting Cache availability data: %v", err)
	}

	for server, history := range statHistory {
		if len(history) < 1 {
			continue // TODO warn?
		}
		cachegroup, ok := toData.ServerCachegroups[server]
		if !ok {
			log.Warnf("server %s has no cachegroup, skipping\n", server)
			continue
		}
		serverType, ok := toData.ServerTypes[enum.CacheName(server)]
		if !ok {
			log.Warnf("server %s not in CRConfig, skipping\n", server)
			continue
		}
		result := history[len(history)-1]

		// TODO check result.PrecomputedData.Errors
		for ds, resultStat := range result.PrecomputedData.DeliveryServiceStats {
			if ds == "" {
				log.Errorf("EMPTY precomputed delivery service")
				continue
			}

			if _, ok := dsStats.DeliveryService[ds]; !ok {
				dsStats.DeliveryService[ds] = resultStat
				continue
			}
			httpDsStat := dsStats.DeliveryService[ds]
			httpDsStat.Total = httpDsStat.Total.Sum(resultStat.Total)
			httpDsStat.CacheGroups[cachegroup] = httpDsStat.CacheGroups[cachegroup].Sum(resultStat.CacheGroups[cachegroup])
			httpDsStat.Type[serverType] = httpDsStat.Type[serverType].Sum(resultStat.Type[serverType])
			dsStats.DeliveryService[ds] = httpDsStat // TODO determine if necessary
		}
	}

	kbpsStats, kbpsStatsLastKbps, kbpsErr := addKbps(statHistory, dsStats, lastKbpsStats, now)
	log.Infof("CreateStats took %v\n", time.Since(start))
	return kbpsStats, kbpsStatsLastKbps, kbpsErr
}
Beispiel #3
0
// TODO remove change subscribers, give Threadsafes directly to the things that need them. If they only set vars, and don't actually do work on change.
func opsConfigManagerListen(opsConfig OpsConfigThreadsafe, opsConfigChannel <-chan handler.OpsConfig, dr chan<- http_server.DataRequest, toSession towrap.ITrafficOpsSession, toData todata.TODataThreadsafe, opsConfigChangeSubscribers []chan<- handler.OpsConfig, toChangeSubscribers []chan<- towrap.ITrafficOpsSession) {
	httpServer := http_server.Server{}

	errorCount := 0 // TODO make threadsafe and a pointer to errorcount in the main manager?
	for {
		select {
		case newOpsConfig := <-opsConfigChannel:
			var err error
			opsConfig.Set(newOpsConfig)

			listenAddress := ":80" // default

			if newOpsConfig.HttpListener != "" {
				listenAddress = newOpsConfig.HttpListener
			}

			handleErr := func(err error) {
				errorCount++
				log.Errorf("OpsConfigManager: %v\n", err)
			}

			err = httpServer.Run(dr, listenAddress)
			if err != nil {
				handleErr(fmt.Errorf("MonitorConfigPoller: error creating HTTP server: %s\n", err))
				continue
			}

			realToSession, err := to.Login(newOpsConfig.Url, newOpsConfig.Username, newOpsConfig.Password, newOpsConfig.Insecure)
			if err != nil {
				handleErr(fmt.Errorf("MonitorConfigPoller: error instantiating Session with traffic_ops: %s\n", err))
				continue
			}
			toSession = towrap.NewTrafficOpsSessionThreadsafe(realToSession)

			if err := toData.Fetch(toSession, newOpsConfig.CdnName); err != nil {
				handleErr(fmt.Errorf("Error getting Traffic Ops data: %v\n", err))
				continue
			}

			// These must be in a goroutine, because the monitorConfigPoller tick sends to a channel this select listens for. Thus, if we block on sends to the monitorConfigPoller, we have a livelock race condition.
			// More generically, we're using goroutines as an infinite chan buffer, to avoid potential livelocks
			for _, subscriber := range opsConfigChangeSubscribers {
				go func() {
					subscriber <- newOpsConfig // this is needed for cdnName
				}()
			}
			for _, subscriber := range toChangeSubscribers {
				go func() {
					subscriber <- toSession
				}()
			}
		}
	}
}
Beispiel #4
0
// precompute does the calculations which are possible with only this one cache result.
func (handler Handler) precompute(result Result) Result {
	todata := handler.ToData.Get()
	stats := map[enum.DeliveryServiceName]dsdata.Stat{}

	var err error
	if result.PrecomputedData.OutBytes, err = outBytes(result.Astats.System.ProcNetDev, result.Astats.System.InfName); err != nil {
		result.PrecomputedData.OutBytes = 0
		log.Errorf("addkbps %s handle precomputing outbytes '%v'\n", result.Id, err)
	}

	for stat, value := range result.Astats.Ats {
		var err error
		stats, err = processStat(result.Id, stats, todata, stat, value)
		if err != nil && err != dsdata.ErrNotProcessedStat {
			log.Errorf("precomputing cache %v stat %v value %v error %v", result.Id, stat, value, err)
			result.PrecomputedData.Errors = append(result.PrecomputedData.Errors, err)
		}
	}
	result.PrecomputedData.DeliveryServiceStats = stats
	return result
}
func dataRequestManagerListen(dr <-chan http_server.DataRequest, opsConfig OpsConfigThreadsafe, toSession towrap.ITrafficOpsSession, localStates peer.CRStatesThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, statHistory StatHistoryThreadsafe, dsStats DSStatsThreadsafe, events EventsThreadsafe, staticAppData StaticAppData, healthPollInterval time.Duration, lastHealthDurations DurationMapThreadsafe, fetchCount UintThreadsafe, healthIteration UintThreadsafe, errorCount UintThreadsafe, toData todata.TODataThreadsafe, localCacheStatus CacheAvailableStatusThreadsafe, lastKbpsStats StatsLastKbpsThreadsafe) {
	for {
		select {
		case req := <-dr:
			defer close(req.Response)

			var body []byte
			var err error

			switch req.Type {
			case http_server.TRConfig:
				cdnName := opsConfig.Get().CdnName
				if toSession == nil {
					err = fmt.Errorf("Unable to connect to Traffic Ops")
				} else if cdnName == "" {
					err = fmt.Errorf("No CDN Configured")
				} else {
					body, err = toSession.CRConfigRaw(cdnName)
				}
				if err != nil {
					err = fmt.Errorf("TR Config: %v", err)
				}
			case http_server.TRStateDerived:
				body, err = peer.CrstatesMarshall(combinedStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (derived): %v", err)
				}
			case http_server.TRStateSelf:
				body, err = peer.CrstatesMarshall(localStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (self): %v", err)
				}
			case http_server.CacheStats:
				// TODO: add support for ?hc=N query param, stats=, wildcard, individual caches
				// add pp and date to the json:
				/*
					pp: "0=[my-ats-edge-cache-1], hc=[1]",
					date: "Thu Oct 09 20:28:36 UTC 2014"
				*/
				params := req.Parameters
				hc := 1
				if _, exists := params["hc"]; exists {
					v, err := strconv.Atoi(params["hc"][0])
					if err == nil {
						hc = v
					}
				}
				body, err = cache.StatsMarshall(statHistory.Get(), hc)
				if err != nil {
					err = fmt.Errorf("CacheStats: %v", err)
				}
			case http_server.DSStats:
				body, err = json.Marshal(ds.StatsJSON(dsStats.Get())) // TODO marshall beforehand, for performance? (test to see how often requests are made)
				if err != nil {
					err = fmt.Errorf("DsStats: %v", err)
				}
			case http_server.EventLog:
				body, err = json.Marshal(JSONEvents{Events: events.Get()})
				if err != nil {
					err = fmt.Errorf("EventLog: %v", err)
				}
			case http_server.PeerStates:
				body, err = json.Marshal(createApiPeerStates(peerStates.Get()))
			case http_server.StatSummary:
				body = []byte("TODO implement")
			case http_server.Stats:
				body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get())
				if err != nil {
					err = fmt.Errorf("Stats: %v", err)
				}
			case http_server.ConfigDoc:
				opsConfigCopy := opsConfig.Get()
				// if the password is blank, leave it blank, so callers can see it's missing.
				if opsConfigCopy.Password != "" {
					opsConfigCopy.Password = "******"
				}
				body, err = json.Marshal(opsConfigCopy)
				if err != nil {
					err = fmt.Errorf("Config Doc: %v", err)
				}
			case http_server.APICacheCount: // TODO determine if this should use peerStates
				body = []byte(strconv.Itoa(len(localStates.Get().Caches)))
			case http_server.APICacheAvailableCount:
				body = []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches)))
			case http_server.APICacheDownCount:
				body = []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches)))
			case http_server.APIVersion:
				s := "traffic_monitor-" + staticAppData.Version + "."
				if len(staticAppData.GitRevision) > 6 {
					s += staticAppData.GitRevision[:6]
				} else {
					s += staticAppData.GitRevision
				}
				body = []byte(s)
			case http_server.APITrafficOpsURI:
				body = []byte(opsConfig.Get().Url)
			case http_server.APICacheStates:
				body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(), lastHealthDurations.Get(), localStates.Get().Caches, lastKbpsStats.Get(), localCacheStatus))
			case http_server.APIBandwidthKbps:
				serverTypes := toData.Get().ServerTypes
				kbpsStats := lastKbpsStats.Get()
				sum := float64(0.0)
				for cache, data := range kbpsStats.Caches {
					if serverTypes[cache] != enum.CacheTypeEdge {
						continue
					}
					sum += data.Kbps
				}
				body = []byte(fmt.Sprintf("%f", sum))
			default:
				err = fmt.Errorf("Unknown Request Type: %v", req.Type)
			}

			if err != nil {
				errorCount.Inc()
				log.Errorf("Request Error: %v\n", err)
			} else {
				req.Response <- body
			}
		}
	}
}
Beispiel #6
0
func addAvailableData(dsStats Stats, crStates peer.Crstates, serverCachegroups map[enum.CacheName]enum.CacheGroupName, serverDs map[string][]string, serverTypes map[enum.CacheName]enum.CacheType, statHistory map[enum.CacheName][]cache.Result) (Stats, error) {
	for cache, available := range crStates.Caches {
		cacheGroup, ok := serverCachegroups[enum.CacheName(cache)]
		if !ok {
			log.Warnf("CreateStats not adding availability data for '%s': not found in Cachegroups\n", cache)
			continue
		}
		deliveryServices, ok := serverDs[cache]
		if !ok {
			log.Warnf("CreateStats not adding availability data for '%s': not found in DeliveryServices\n", cache)
			continue
		}
		cacheType, ok := serverTypes[enum.CacheName(cache)]
		if !ok {
			log.Warnf("CreateStats not adding availability data for '%s': not found in Server Types\n", cache)
			continue
		}

		for _, deliveryService := range deliveryServices {
			if deliveryService == "" {
				log.Errorf("EMPTY addAvailableData DS") // various bugs in other functions can cause this - this will help identify and debug them.
				continue
			}

			stat, ok := dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)]
			if !ok {
				log.Warnf("CreateStats not adding availability data for '%s': not found in Stats\n", cache)
				continue // TODO log warning? Error?
			}

			if available.IsAvailable {
				// c.IsAvailable.Value
				stat.Common.IsAvailable.Value = true
				stat.Common.CachesAvailable.Value++
				cacheGroupStats := stat.CacheGroups[enum.CacheGroupName(cacheGroup)]
				cacheGroupStats.IsAvailable.Value = true
				stat.CacheGroups[enum.CacheGroupName(cacheGroup)] = cacheGroupStats
				stat.Total.IsAvailable.Value = true
				typeStats := stat.Type[cacheType]
				typeStats.IsAvailable.Value = true
				stat.Type[cacheType] = typeStats
			}

			// TODO fix nested ifs
			if results, ok := statHistory[enum.CacheName(cache)]; ok {
				if len(results) < 1 {
					log.Warnf("no results %v %v\n", cache, deliveryService)
				} else {
					result := results[0]
					if result.PrecomputedData.Reporting {
						stat.Common.CachesReporting[enum.CacheName(cache)] = true
					} else {
						log.Debugf("no reporting %v %v\n", cache, deliveryService)
					}
				}
			} else {
				log.Debugf("no result for %v %v\n", cache, deliveryService)
			}

			dsStats.DeliveryService[enum.DeliveryServiceName(deliveryService)] = stat // TODO Necessary? Remove?
		}
	}
	return dsStats, nil
}
Beispiel #7
0
// addKbps adds Kbps fields to the NewStats, based on the previous out_bytes in the oldStats, and the time difference.
//
// Traffic Server only updates its data every N seconds. So, often we get a new Stats with the same OutBytes as the previous one,
// So, we must record the last changed value, and the time it changed. Then, if the new OutBytes is different from the previous,
// we set the (new - old) / lastChangedTime as the KBPS, and update the recorded LastChangedTime and LastChangedValue
//
// This specifically returns the given dsStats and lastKbpsStats on error, so it's safe to do persistentStats, persistentLastKbpsStats, err = addKbps(...)
// TODO handle ATS byte rolling (when the `out_bytes` overflows back to 0)
func addKbps(statHistory map[enum.CacheName][]cache.Result, dsStats Stats, lastKbpsStats StatsLastKbps, dsStatsTime time.Time) (Stats, StatsLastKbps, error) {
	for dsName, stat := range dsStats.DeliveryService {
		lastKbpsStat, lastKbpsStatExists := lastKbpsStats.DeliveryServices[dsName]
		if !lastKbpsStatExists {
			lastKbpsStat = newStatLastKbps()
		}

		for cgName, cacheStats := range stat.CacheGroups {
			lastKbpsData, _ := lastKbpsStat.CacheGroups[cgName]

			if cacheStats.OutBytes.Value == lastKbpsData.Bytes {
				cacheStats.Kbps.Value = lastKbpsData.Kbps
				stat.CacheGroups[cgName] = cacheStats
				continue
			}

			if lastKbpsStatExists && lastKbpsData.Bytes != 0 {
				cacheStats.Kbps.Value = float64(cacheStats.OutBytes.Value-lastKbpsData.Bytes) / dsStatsTime.Sub(lastKbpsData.Time).Seconds()
			}

			if cacheStats.Kbps.Value < 0 {
				cacheStats.Kbps.Value = 0
				log.Errorf("addkbps negative cachegroup cacheStats.Kbps.Value: '%v' '%v' %v - %v / %v\n", dsName, cgName, cacheStats.OutBytes.Value, lastKbpsData.Bytes, dsStatsTime.Sub(lastKbpsData.Time).Seconds())
			}

			lastKbpsStat.CacheGroups[cgName] = LastKbpsData{Time: dsStatsTime, Bytes: cacheStats.OutBytes.Value, Kbps: cacheStats.Kbps.Value}
			stat.CacheGroups[cgName] = cacheStats
		}

		for cacheType, cacheStats := range stat.Type {
			lastKbpsData, _ := lastKbpsStat.Type[cacheType]
			if cacheStats.OutBytes.Value == lastKbpsData.Bytes {
				if cacheStats.OutBytes.Value == lastKbpsData.Bytes {
					if lastKbpsData.Kbps < 0 {
						log.Errorf("addkbps negative cachetype cacheStats.Kbps.Value!\n")
						lastKbpsData.Kbps = 0
					}
					cacheStats.Kbps.Value = lastKbpsData.Kbps
					stat.Type[cacheType] = cacheStats
					continue
				}
				if lastKbpsStatExists && lastKbpsData.Bytes != 0 {
					cacheStats.Kbps.Value = float64(cacheStats.OutBytes.Value-lastKbpsData.Bytes) / dsStatsTime.Sub(lastKbpsData.Time).Seconds()
				}
				if cacheStats.Kbps.Value < 0 {
					log.Errorf("addkbps negative cachetype cacheStats.Kbps.Value.\n")
					cacheStats.Kbps.Value = 0
				}
				lastKbpsStat.Type[cacheType] = LastKbpsData{Time: dsStatsTime, Bytes: cacheStats.OutBytes.Value, Kbps: cacheStats.Kbps.Value}
				stat.Type[cacheType] = cacheStats
			}
		}

		totalChanged := lastKbpsStat.Total.Bytes != stat.Total.OutBytes.Value
		if lastKbpsStatExists && lastKbpsStat.Total.Bytes != 0 && totalChanged {
			stat.Total.Kbps.Value = float64(stat.Total.OutBytes.Value-lastKbpsStat.Total.Bytes) / dsStatsTime.Sub(lastKbpsStat.Total.Time).Seconds() / BytesPerKbps
			if stat.Total.Kbps.Value < 0 {
				stat.Total.Kbps.Value = 0
				log.Errorf("addkbps negative stat.Total.Kbps.Value! Deliveryservice '%v' %v - %v / %v\n", dsName, stat.Total.OutBytes.Value, lastKbpsStat.Total.Bytes, dsStatsTime.Sub(lastKbpsStat.Total.Time).Seconds())
			}
		} else {
			stat.Total.Kbps.Value = lastKbpsStat.Total.Kbps
		}

		if totalChanged {
			lastKbpsStat.Total = LastKbpsData{Time: dsStatsTime, Bytes: stat.Total.OutBytes.Value, Kbps: stat.Total.Kbps.Value}
		}

		lastKbpsStats.DeliveryServices[dsName] = lastKbpsStat
		dsStats.DeliveryService[dsName] = stat
	}

	for cacheName, results := range statHistory {
		var result *cache.Result
		for _, r := range results {
			// result.Errors can include stat errors where OutBytes was set correctly, so we look for the first non-zero OutBytes rather than the first errorless result
			// TODO add error classes to PrecomputedData, to distinguish stat errors from HTTP errors?
			if r.PrecomputedData.OutBytes == 0 {
				continue
			}
			result = &r
			break
		}

		if result == nil {
			log.Warnf("addkbps cache %v has no results\n", cacheName)
			continue
		}

		outBytes := result.PrecomputedData.OutBytes

		lastCacheKbpsData, ok := lastKbpsStats.Caches[cacheName]
		if !ok {
			// this means this is the first result for this cache - this is a normal condition
			lastKbpsStats.Caches[cacheName] = LastKbpsData{Time: dsStatsTime, Bytes: outBytes, Kbps: 0}
			continue
		}

		if lastCacheKbpsData.Bytes == outBytes {
			// this means this ATS hasn't updated its byte count yet - this is a normal condition
			continue // don't try to kbps, and importantly don't change the time of the last change, if Traffic Server hasn't updated
		}

		if outBytes == 0 {
			log.Errorf("addkbps %v outbytes zero\n", cacheName)
			continue
		}

		kbps := float64(outBytes-lastCacheKbpsData.Bytes) / result.Time.Sub(lastCacheKbpsData.Time).Seconds() / BytesPerKbps
		if lastCacheKbpsData.Bytes == 0 {
			kbps = 0
			log.Errorf("addkbps cache %v lastCacheKbpsData.Bytes zero\n", cacheName)
		}
		if kbps < 0 {
			log.Errorf("addkbps negative cache kbps: cache %v kbps %v outBytes %v lastCacheKbpsData.Bytes %v dsStatsTime %v lastCacheKbpsData.Time %v\n", cacheName, kbps, outBytes, lastCacheKbpsData.Bytes, dsStatsTime, lastCacheKbpsData.Time) // this is almost certainly a code bug. The only case this would ever be a data issue, would be if Traffic Server returned fewer bytes than previously.
			kbps = 0
		}

		lastKbpsStats.Caches[cacheName] = LastKbpsData{Time: result.Time, Bytes: outBytes, Kbps: kbps}
	}

	return dsStats, lastKbpsStats, nil
}