コード例 #1
0
// DataRequest takes an `http_server.DataRequest`, and the monitored data objects, and returns the appropriate response, and the status code.
func DataRequest(
	req http_server.DataRequest,
	opsConfig OpsConfigThreadsafe,
	toSession towrap.ITrafficOpsSession,
	localStates peer.CRStatesThreadsafe,
	peerStates peer.CRStatesPeersThreadsafe,
	combinedStates peer.CRStatesThreadsafe,
	statHistory StatHistoryThreadsafe,
	dsStats DSStatsThreadsafe,
	events EventsThreadsafe,
	staticAppData StaticAppData,
	healthPollInterval time.Duration,
	lastHealthDurations DurationMapThreadsafe,
	fetchCount UintThreadsafe,
	healthIteration UintThreadsafe,
	errorCount UintThreadsafe,
	toData todata.TODataThreadsafe,
	localCacheStatus CacheAvailableStatusThreadsafe,
	lastStats LastStatsThreadsafe,
	unpolledCaches UnpolledCachesThreadsafe,
) (body []byte, responseCode int) {

	// handleErr takes an error, and the request type it came from, and logs. It is ok to call with a nil error, in which case this is a no-op.
	handleErr := func(err error) {
		if err == nil {
			return
		}
		errorCount.Inc()
		log.Errorf("Request Error: %v\n", fmt.Errorf(req.Type.String()+": %v", err))
	}

	// commonReturn takes the body, err, and the data request Type which has been processed. It logs and deals with any error, and returns the appropriate bytes and response code for the `http_server`.
	commonReturn := func(body []byte, err error) ([]byte, int) {
		if err == nil {
			return body, http.StatusOK
		}
		handleErr(err)
		return nil, http.StatusInternalServerError
	}

	if unpolledCaches.Any() {
		handleErr(fmt.Errorf("service still starting, some caches unpolled"))
		return []byte("Service Unavailable"), http.StatusServiceUnavailable
	}

	var err error
	switch req.Type {
	case http_server.TRConfig:
		cdnName := opsConfig.Get().CdnName
		if toSession == nil {
			return commonReturn(nil, fmt.Errorf("Unable to connect to Traffic Ops"))
		}
		if cdnName == "" {
			return commonReturn(nil, fmt.Errorf("No CDN Configured"))
		}
		return commonReturn(body, err)
	case http_server.TRStateDerived:
		body, err = peer.CrstatesMarshall(combinedStates.Get())
		return commonReturn(body, err)
	case http_server.TRStateSelf:
		body, err = peer.CrstatesMarshall(localStates.Get())
		return commonReturn(body, err)
	case http_server.CacheStats:
		filter, err := NewCacheStatFilter(req.Parameters, toData.Get().ServerTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}
		body, err = cache.StatsMarshall(statHistory.Get(), filter, req.Parameters)
		return commonReturn(body, err)
	case http_server.DSStats:
		filter, err := NewDSStatFilter(req.Parameters, toData.Get().DeliveryServiceTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}
		body, err = json.Marshal(dsStats.Get().JSON(filter, req.Parameters)) // TODO marshall beforehand, for performance? (test to see how often requests are made)
		return commonReturn(body, err)
	case http_server.EventLog:
		body, err = json.Marshal(JSONEvents{Events: events.Get()})
		return commonReturn(body, err)
	case http_server.PeerStates:
		filter, err := NewPeerStateFilter(req.Parameters, toData.Get().ServerTypes)
		if err != nil {
			handleErr(err)
			return []byte(err.Error()), http.StatusBadRequest
		}

		body, err = json.Marshal(createApiPeerStates(peerStates.Get(), filter, req.Parameters))
		return commonReturn(body, err)
	case http_server.StatSummary:
		return nil, http.StatusNotImplemented
	case http_server.Stats:
		body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get())
		return commonReturn(body, err)
	case http_server.ConfigDoc:
		opsConfigCopy := opsConfig.Get()
		// if the password is blank, leave it blank, so callers can see it's missing.
		if opsConfigCopy.Password != "" {
			opsConfigCopy.Password = "******"
		}
		body, err = json.Marshal(opsConfigCopy)
		return commonReturn(body, err)
	case http_server.APICacheCount: // TODO determine if this should use peerStates
		return []byte(strconv.Itoa(len(localStates.Get().Caches))), http.StatusOK
	case http_server.APICacheAvailableCount:
		return []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches))), http.StatusOK
	case http_server.APICacheDownCount:
		return []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches))), http.StatusOK
	case http_server.APIVersion:
		s := "traffic_monitor-" + staticAppData.Version + "."
		if len(staticAppData.GitRevision) > 6 {
			s += staticAppData.GitRevision[:6]
		} else {
			s += staticAppData.GitRevision
		}
		return []byte(s), http.StatusOK
	case http_server.APITrafficOpsURI:
		return []byte(opsConfig.Get().Url), http.StatusOK
	case http_server.APICacheStates:
		body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(),
			lastHealthDurations.Get(), localStates.Get().Caches, lastStats.Get(), localCacheStatus))
		return commonReturn(body, err)
	case http_server.APIBandwidthKbps:
		serverTypes := toData.Get().ServerTypes
		kbpsStats := lastStats.Get()
		sum := float64(0.0)
		for cache, data := range kbpsStats.Caches {
			if serverTypes[cache] != enum.CacheTypeEdge {
				continue
			}
			sum += data.Bytes.PerSec / ds.BytesPerKilobit
		}
		return []byte(fmt.Sprintf("%f", sum)), http.StatusOK
	case http_server.APIBandwidthCapacityKbps:
		statHistory := statHistory.Get()
		cap := int64(0)
		for _, results := range statHistory {
			if len(results) == 0 {
				continue
			}
			cap += results[0].MaxKbps
		}
		return []byte(fmt.Sprintf("%d", cap)), http.StatusOK
	default:
		return commonReturn(nil, fmt.Errorf("Unknown Request Type"))
	}
}
コード例 #2
0
func dataRequestManagerListen(dr <-chan http_server.DataRequest, opsConfig OpsConfigThreadsafe, toSession towrap.ITrafficOpsSession, localStates peer.CRStatesThreadsafe, peerStates peer.CRStatesPeersThreadsafe, combinedStates peer.CRStatesThreadsafe, statHistory StatHistoryThreadsafe, dsStats DSStatsThreadsafe, events EventsThreadsafe, staticAppData StaticAppData, healthPollInterval time.Duration, lastHealthDurations DurationMapThreadsafe, fetchCount UintThreadsafe, healthIteration UintThreadsafe, errorCount UintThreadsafe, toData todata.TODataThreadsafe, localCacheStatus CacheAvailableStatusThreadsafe, lastKbpsStats StatsLastKbpsThreadsafe) {
	for {
		select {
		case req := <-dr:
			defer close(req.Response)

			var body []byte
			var err error

			switch req.Type {
			case http_server.TRConfig:
				cdnName := opsConfig.Get().CdnName
				if toSession == nil {
					err = fmt.Errorf("Unable to connect to Traffic Ops")
				} else if cdnName == "" {
					err = fmt.Errorf("No CDN Configured")
				} else {
					body, err = toSession.CRConfigRaw(cdnName)
				}
				if err != nil {
					err = fmt.Errorf("TR Config: %v", err)
				}
			case http_server.TRStateDerived:
				body, err = peer.CrstatesMarshall(combinedStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (derived): %v", err)
				}
			case http_server.TRStateSelf:
				body, err = peer.CrstatesMarshall(localStates.Get())
				if err != nil {
					err = fmt.Errorf("TR State (self): %v", err)
				}
			case http_server.CacheStats:
				// TODO: add support for ?hc=N query param, stats=, wildcard, individual caches
				// add pp and date to the json:
				/*
					pp: "0=[my-ats-edge-cache-1], hc=[1]",
					date: "Thu Oct 09 20:28:36 UTC 2014"
				*/
				params := req.Parameters
				hc := 1
				if _, exists := params["hc"]; exists {
					v, err := strconv.Atoi(params["hc"][0])
					if err == nil {
						hc = v
					}
				}
				body, err = cache.StatsMarshall(statHistory.Get(), hc)
				if err != nil {
					err = fmt.Errorf("CacheStats: %v", err)
				}
			case http_server.DSStats:
				body, err = json.Marshal(ds.StatsJSON(dsStats.Get())) // TODO marshall beforehand, for performance? (test to see how often requests are made)
				if err != nil {
					err = fmt.Errorf("DsStats: %v", err)
				}
			case http_server.EventLog:
				body, err = json.Marshal(JSONEvents{Events: events.Get()})
				if err != nil {
					err = fmt.Errorf("EventLog: %v", err)
				}
			case http_server.PeerStates:
				body, err = json.Marshal(createApiPeerStates(peerStates.Get()))
			case http_server.StatSummary:
				body = []byte("TODO implement")
			case http_server.Stats:
				body, err = getStats(staticAppData, healthPollInterval, lastHealthDurations.Get(), fetchCount.Get(), healthIteration.Get(), errorCount.Get())
				if err != nil {
					err = fmt.Errorf("Stats: %v", err)
				}
			case http_server.ConfigDoc:
				opsConfigCopy := opsConfig.Get()
				// if the password is blank, leave it blank, so callers can see it's missing.
				if opsConfigCopy.Password != "" {
					opsConfigCopy.Password = "******"
				}
				body, err = json.Marshal(opsConfigCopy)
				if err != nil {
					err = fmt.Errorf("Config Doc: %v", err)
				}
			case http_server.APICacheCount: // TODO determine if this should use peerStates
				body = []byte(strconv.Itoa(len(localStates.Get().Caches)))
			case http_server.APICacheAvailableCount:
				body = []byte(strconv.Itoa(cacheAvailableCount(localStates.Get().Caches)))
			case http_server.APICacheDownCount:
				body = []byte(strconv.Itoa(cacheDownCount(localStates.Get().Caches)))
			case http_server.APIVersion:
				s := "traffic_monitor-" + staticAppData.Version + "."
				if len(staticAppData.GitRevision) > 6 {
					s += staticAppData.GitRevision[:6]
				} else {
					s += staticAppData.GitRevision
				}
				body = []byte(s)
			case http_server.APITrafficOpsURI:
				body = []byte(opsConfig.Get().Url)
			case http_server.APICacheStates:
				body, err = json.Marshal(createCacheStatuses(toData.Get().ServerTypes, statHistory.Get(), lastHealthDurations.Get(), localStates.Get().Caches, lastKbpsStats.Get(), localCacheStatus))
			case http_server.APIBandwidthKbps:
				serverTypes := toData.Get().ServerTypes
				kbpsStats := lastKbpsStats.Get()
				sum := float64(0.0)
				for cache, data := range kbpsStats.Caches {
					if serverTypes[cache] != enum.CacheTypeEdge {
						continue
					}
					sum += data.Kbps
				}
				body = []byte(fmt.Sprintf("%f", sum))
			default:
				err = fmt.Errorf("Unknown Request Type: %v", req.Type)
			}

			if err != nil {
				errorCount.Inc()
				log.Errorf("Request Error: %v\n", err)
			} else {
				req.Response <- body
			}
		}
	}
}
コード例 #3
0
//
// Kicks off the pollers and handlers
//
func Start(opsConfigFile string, staticAppData StaticAppData) {
	var toSession *traffic_ops.Session

	fetchSuccessCounter := gmx.NewCounter("fetchSuccess")
	fetchFailCounter := gmx.NewCounter("fetchFail")
	fetchPendingGauge := gmx.NewGauge("fetchPending")

	tr := &http.Transport{
		TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
	}

	sharedClient := http.Client{
		Timeout:   defaultHttpTimeout,
		Transport: tr,
	}

	cacheHealthConfigChannel := make(chan poller.HttpPollerConfig)
	cacheHealthChannel := make(chan cache.Result)
	cacheHealthTick := make(chan uint64)
	cacheHealthPoller := poller.HttpPoller{
		TickChan:      cacheHealthTick,
		ConfigChannel: cacheHealthConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultCacheHealthPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: cache.Handler{ResultChannel: cacheHealthChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	cacheStatConfigChannel := make(chan poller.HttpPollerConfig)
	cacheStatChannel := make(chan cache.Result)
	cacheStatPoller := poller.HttpPoller{
		ConfigChannel: cacheStatConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultCacheStatPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: cache.Handler{ResultChannel: cacheStatChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	sessionChannel := make(chan *traffic_ops.Session)
	monitorConfigChannel := make(chan traffic_ops.TrafficMonitorConfigMap)
	monitorOpsConfigChannel := make(chan handler.OpsConfig)
	monitorConfigPoller := poller.MonitorConfigPoller{
		Interval:         defaultMonitorConfigPollingInterval,
		SessionChannel:   sessionChannel,
		ConfigChannel:    monitorConfigChannel,
		OpsConfigChannel: monitorOpsConfigChannel,
	}

	opsConfigFileChannel := make(chan interface{})
	opsConfigFilePoller := poller.FilePoller{
		File:          opsConfigFile,
		ResultChannel: opsConfigFileChannel,
	}

	opsConfigChannel := make(chan handler.OpsConfig)
	opsConfigFileHandler := handler.OpsConfigFileHandler{
		ResultChannel:    opsConfigFilePoller.ResultChannel,
		OpsConfigChannel: opsConfigChannel,
	}

	peerConfigChannel := make(chan poller.HttpPollerConfig)
	peerChannel := make(chan peer.Result)
	peerPoller := poller.HttpPoller{
		ConfigChannel: peerConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultPeerPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: peer.Handler{ResultChannel: peerChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	go opsConfigFileHandler.Listen()
	go opsConfigFilePoller.Poll()
	go monitorConfigPoller.Poll()
	go cacheHealthPoller.Poll()
	go cacheStatPoller.Poll()
	go peerPoller.Poll()

	dr := make(chan http_server.DataRequest)

	healthHistory := make(map[string][]interface{})
	statHistory := make(map[string][]interface{})

	var opsConfig handler.OpsConfig
	var monitorConfig traffic_ops.TrafficMonitorConfigMap
	localStates := peer.Crstates{Caches: make(map[string]peer.IsAvailable), Deliveryservice: make(map[string]peer.Deliveryservice)}    // this is the local state as discoverer by this traffic_monitor
	peerStates := make(map[string]peer.Crstates)                                                                                       // each peer's last state is saved in this map
	combinedStates := peer.Crstates{Caches: make(map[string]peer.IsAvailable), Deliveryservice: make(map[string]peer.Deliveryservice)} // this is the result of combining the localStates and all the peerStates using the var ??

	deliveryServiceServers := map[string][]string{}
	serverTypes := map[string]string{}

	// TODO put stat data in a struct, for brevity
	lastHealthEndTimes := map[string]time.Time{}
	lastHealthDurations := map[string]time.Duration{}
	fetchCount := uint64(0) // note this is the number of individual caches fetched from, not the number of times all the caches were polled.
	healthIteration := uint64(0)
	errorCount := uint64(0)
	events := []Event{}
	eventIndex := uint64(0)
	for {
		select {
		case req := <-dr:
			defer close(req.C)

			var body []byte
			var err error

			switch req.T {
			case http_server.TR_CONFIG:
				if toSession != nil && opsConfig.CdnName != "" {
					body, err = toSession.CRConfigRaw(opsConfig.CdnName)
				}
			case http_server.TR_STATE_DERIVED:
				body, err = peer.CrStatesMarshall(combinedStates)
			case http_server.TR_STATE_SELF:
				body, err = peer.CrStatesMarshall(localStates)
			case http_server.CACHE_STATS:
				// TODO: add support for ?hc=N query param, stats=, wildcard, individual caches
				// add pp and date to the json:
				/*
					pp: "0=[my-ats-edge-cache-1], hc=[1]",
					date: "Thu Oct 09 20:28:36 UTC 2014"
				*/
				params := req.Parameters
				hc := 1
				if _, exists := params["hc"]; exists {
					v, err := strconv.Atoi(params["hc"][0])
					if err == nil {
						hc = v
					}
				}
				body, err = cache.StatsMarshall(statHistory, hc)
			case http_server.DS_STATS:
				body = []byte("TODO implement")
			case http_server.EVENT_LOG:
				body, err = json.Marshal(JSONEvents{Events: events})
			case http_server.PEER_STATES:
				body = []byte("TODO implement")
			case http_server.STAT_SUMMARY:
				body = []byte("TODO implement")
			case http_server.STATS:
				body, err = getStats(staticAppData, cacheHealthPoller.Config.Interval, lastHealthDurations, fetchCount, healthIteration, errorCount)
				if err != nil {
					// TODO send error to client
					errorCount++
					log.Printf("ERROR getting stats %v\n", err)
					continue
				}
			case http_server.CONFIG_DOC:
				opsConfigCopy := opsConfig
				// if the password is blank, leave it blank, so callers can see it's missing.
				if opsConfigCopy.Password != "" {
					opsConfigCopy.Password = "******"
				}
				body, err = json.Marshal(opsConfigCopy)
			default:
				body = []byte("TODO error message")
			}
			req.C <- body
		case oc := <-opsConfigFileHandler.OpsConfigChannel:
			var err error
			opsConfig = oc

			listenAddress := ":80" // default

			if opsConfig.HttpListener != "" {
				listenAddress = opsConfig.HttpListener
			}

			err = http_server.Run(dr, listenAddress)
			if err != nil {
				errorCount++
				log.Printf("MonitorConfigPoller: error creating HTTP server: %s\n", err)
				continue
			}

			toSession, err = traffic_ops.Login(opsConfig.Url, opsConfig.Username, opsConfig.Password, opsConfig.Insecure)
			if err != nil {
				errorCount++
				log.Printf("MonitorConfigPoller: error instantiating Session with traffic_ops: %s\n", err)
				continue
			}

			deliveryServiceServers, err = getDeliveryServiceServers(toSession, opsConfig.CdnName)
			if err != nil {
				errorCount++
				log.Printf("Error getting delivery service servers from Traffic Ops: %v\n", err)
				continue
			}

			serverTypes, err = getServerTypes(toSession, opsConfig.CdnName)
			if err != nil {
				errorCount++
				log.Printf("Error getting server types from Traffic Ops: %v\n", err)
				continue
			}

			// This must be in a goroutine, because the monitorConfigPoller tick sends to a channel this select listens for. Thus, if we block on sends to the monitorConfigPoller, we have a livelock race condition.
			go func() {
				monitorConfigPoller.OpsConfigChannel <- opsConfig // this is needed for cdnName
				monitorConfigPoller.SessionChannel <- toSession
			}()
		case monitorConfig = <-monitorConfigPoller.ConfigChannel:
			healthUrls := map[string]string{}
			statUrls := map[string]string{}
			peerUrls := map[string]string{}
			caches := map[string]string{}

			for _, srv := range monitorConfig.TrafficServer {
				caches[srv.HostName] = srv.Status

				if srv.Status == "ONLINE" {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: true}
					continue
				}
				if srv.Status == "OFFLINE" {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: false}
					continue
				}
				// seed states with available = false until our polling cycle picks up a result
				if _, exists := localStates.Caches[srv.HostName]; !exists {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: false}
				}

				url := monitorConfig.Profile[srv.Profile].Parameters.HealthPollingURL
				r := strings.NewReplacer(
					"${hostname}", srv.FQDN,
					"${interface_name}", srv.InterfaceName,
					"application=system", "application=plugin.remap",
					"application=", "application=plugin.remap",
				)
				url = r.Replace(url)
				healthUrls[srv.HostName] = url
				r = strings.NewReplacer("application=plugin.remap", "application=")
				url = r.Replace(url)
				statUrls[srv.HostName] = url
			}

			for _, srv := range monitorConfig.TrafficMonitor {
				if srv.Status != "ONLINE" {
					continue
				}
				// TODO: the URL should be config driven. -jse
				url := fmt.Sprintf("http://%s:%d/publish/CrStates?raw", srv.IP, srv.Port)
				peerUrls[srv.HostName] = url
			}

			cacheStatPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: statUrls, Interval: defaultCacheStatPollingInterval}
			cacheHealthPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: healthUrls, Interval: defaultCacheHealthPollingInterval}
			peerPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: peerUrls, Interval: defaultPeerPollingInterval}

			for k := range localStates.Caches {
				_, exists := monitorConfig.TrafficServer[k]

				if !exists {
					fmt.Printf("Warning: removing %s from localStates", k)
					delete(localStates.Caches, k)
				}
			}

			addStateDeliveryServices(monitorConfig, localStates.Deliveryservice)
		case i := <-cacheHealthTick:
			healthIteration = i
		case healthResult := <-cacheHealthChannel:
			fetchCount++
			var prevResult cache.Result
			if len(healthHistory[healthResult.Id]) != 0 {
				prevResult = healthHistory[healthResult.Id][len(healthHistory[healthResult.Id])-1].(cache.Result)
			}
			health.GetVitals(&healthResult, &prevResult, &monitorConfig)
			healthHistory[healthResult.Id] = pruneHistory(append(healthHistory[healthResult.Id], healthResult), defaultMaxHistory)
			isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfig)
			if localStates.Caches[healthResult.Id].IsAvailable != isAvailable {
				fmt.Println("Changing state for", healthResult.Id, " was:", prevResult.Available, " is now:", isAvailable, " because:", whyAvailable, " errors:", healthResult.Errors)
				e := Event{
					Index:       eventIndex,
					Time:        time.Now().Unix(),
					Description: whyAvailable,
					Name:        healthResult.Id,
					Hostname:    healthResult.Id,
					Type:        serverTypes[healthResult.Id],
					Available:   isAvailable,
				}
				events = append([]Event{e}, events...)
				if len(events) > maxEvents {
					events = events[:maxEvents-1]
				}
				eventIndex++
			}
			localStates.Caches[healthResult.Id] = peer.IsAvailable{IsAvailable: isAvailable}
			calculateDeliveryServiceState(deliveryServiceServers, localStates.Caches, localStates.Deliveryservice)

			if lastHealthStart, ok := lastHealthEndTimes[healthResult.Id]; ok {
				lastHealthDurations[healthResult.Id] = time.Since(lastHealthStart)
			}
			lastHealthEndTimes[healthResult.Id] = time.Now()

			// if _, ok := queryIntervalStart[pollI]; !ok {
			// 	log.Printf("ERROR poll start index not found")
			// 	continue
			// }
			// lastQueryIntervalTime = time.Since(queryIntervalStart[pollI])
		case stats := <-cacheStatChannel:
			statHistory[stats.Id] = pruneHistory(append(statHistory[stats.Id], stats), defaultMaxHistory)
		case crStatesResult := <-peerChannel:
			peerStates[crStatesResult.Id] = crStatesResult.PeerStats
			combinedStates = combineCrStates(peerStates, localStates)
		}
	}
}