Пример #1
0
func getServerTypes(to *traffic_ops.Session, cdn string) (map[string]string, error) {
	// This is efficient (with getDeliveryServiceServers) because the traffic_ops client caches its result.
	// Were that not the case, these functions could be refactored to only call traffic_ops.Session.CRConfigRaw() once.

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, err
	}

	type CrConfig struct {
		ContentServers map[string]struct {
			Type string `json:"type"`
		} `json:"contentServers"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, err
	}

	serverTypes := map[string]string{}
	for serverName, serverData := range crc.ContentServers {
		serverTypes[serverName] = serverData.Type
	}
	return serverTypes, nil
}
Пример #2
0
func getDeliveryServiceTypes(to *traffic_ops.Session, cdn string) (map[string]deliveryservicestats.DsStatType, error) {
	dsTypes := map[string]deliveryservicestats.DsStatType{}

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, err
	}
	type CrConfig struct {
		DeliveryServices map[string]struct {
			Matchsets []struct {
				Protocol string `json:"protocol"`
			} `json:"matchsets"`
		} `json:"deliveryServices"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, fmt.Errorf("Error unmarshalling CRConfig: %v", err)
	}

	for dsName, dsData := range crc.DeliveryServices {
		if len(dsData.Matchsets) < 1 {
			return nil, fmt.Errorf("CRConfig missing protocol for '%s'", dsName)
		}
		dsTypeStr := dsData.Matchsets[0].Protocol
		dsType := deliveryservicestats.DsStatTypeFromString(dsTypeStr)
		if dsType == deliveryservicestats.DsStatTypeInvalid {
			return nil, fmt.Errorf("CRConfig unknowng protocol for '%s': '%s'", dsName, dsTypeStr)
		}
		dsTypes[dsName] = dsType
	}
	return dsTypes, nil
}
Пример #3
0
// getServerTypes gets the cache type of each ATS Edge+Mid Cache server, for the given CDN, from Traffic Ops.
func getServerTypes(to *traffic_ops.Session, cdn string) (map[string]deliveryservicestats.DsStatCacheType, error) {
	serverTypes := map[string]deliveryservicestats.DsStatCacheType{}

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, err
	}
	type CrConfig struct {
		ContentServers map[string]struct {
			Type string `json:"type"`
		} `json:"contentServers"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, err
	}

	for server, serverData := range crc.ContentServers {
		t := deliveryservicestats.DsStatCacheTypeFromString(serverData.Type)
		if t == deliveryservicestats.DsStatCacheTypeInvalid {
			return nil, fmt.Errorf("getServerTypes CRConfig unknown type for '%s': '%s'", server, serverData.Type)
		}
		serverTypes[server] = t
	}
	return serverTypes, nil
}
Пример #4
0
// getDeliveryServiceRegexes gets the regexes of each delivery service, for the given CDN, from Traffic Ops.
// Returns a map[deliveryService][]regex.
func getDeliveryServiceRegexes(to *traffic_ops.Session, cdn string) (map[string][]string, error) {
	dsRegexes := map[string][]string{}

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, err
	}
	type CrConfig struct {
		DeliveryServices map[string]struct {
			Matchsets []struct {
				MatchList []struct {
					Regex string `json:"regex"`
				} `json:"matchlist"`
			} `json:"matchsets"`
		} `json:"deliveryServices"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, err
	}

	for dsName, dsData := range crc.DeliveryServices {
		if len(dsData.Matchsets) < 1 {
			return nil, fmt.Errorf("CRConfig missing regex for '%s'", dsName)
		}
		for _, matchset := range dsData.Matchsets {
			if len(matchset.MatchList) < 1 {
				return nil, fmt.Errorf("CRConfig missing Regex for '%s'", dsName)
			}
			dsRegexes[dsName] = append(dsRegexes[dsName], matchset.MatchList[0].Regex)
		}
	}
	return dsRegexes, nil
}
Пример #5
0
// getDeliveryServiceServers gets the servers on each delivery services, for the given CDN, from Traffic Ops.
// Returns a map[deliveryService][]server, and a map[server]deliveryService
func getDeliveryServiceServers(to *traffic_ops.Session, cdn string) (map[string][]string, map[string]string, error) {
	dsServers := map[string][]string{}
	serverDs := map[string]string{}

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, nil, err
	}
	type CrConfig struct {
		ContentServers map[string]struct {
			DeliveryServices map[string][]string `json:"deliveryServices"`
		} `json:"contentServers"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, nil, err
	}

	for serverName, serverData := range crc.ContentServers {
		for deliveryServiceName, _ := range serverData.DeliveryServices {
			dsServers[deliveryServiceName] = append(dsServers[deliveryServiceName], serverName)
			serverDs[serverName] = deliveryServiceName
		}
	}
	return dsServers, serverDs, nil
}
Пример #6
0
// getServerCachegroups gets the cachegroup of each ATS Edge+Mid Cache server, for the given CDN, from Traffic Ops.
// Returns a map[server]cachegroup.
func getServerCachegroups(to *traffic_ops.Session, cdn string) (map[string]string, error) {
	serverCachegroups := map[string]string{}

	crcData, err := to.CRConfigRaw(cdn)
	if err != nil {
		return nil, err
	}
	type CrConfig struct {
		ContentServers map[string]struct {
			CacheGroup string `json:"cacheGroup"`
		} `json:"contentServers"`
	}
	var crc CrConfig
	if err := json.Unmarshal(crcData, &crc); err != nil {
		return nil, err
	}

	for server, serverData := range crc.ContentServers {
		serverCachegroups[server] = serverData.CacheGroup
	}
	return serverCachegroups, nil
}
Пример #7
0
//
// Kicks off the pollers and handlers
//
func Start(opsConfigFile string, staticAppData StaticAppData) {
	var toSession *traffic_ops.Session

	fetchSuccessCounter := gmx.NewCounter("fetchSuccess")
	fetchFailCounter := gmx.NewCounter("fetchFail")
	fetchPendingGauge := gmx.NewGauge("fetchPending")

	tr := &http.Transport{
		TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
	}

	sharedClient := http.Client{
		Timeout:   defaultHttpTimeout,
		Transport: tr,
	}

	cacheHealthConfigChannel := make(chan poller.HttpPollerConfig)
	cacheHealthChannel := make(chan cache.Result)
	cacheHealthTick := make(chan uint64)
	cacheHealthPoller := poller.HttpPoller{
		TickChan:      cacheHealthTick,
		ConfigChannel: cacheHealthConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultCacheHealthPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: cache.Handler{ResultChannel: cacheHealthChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	cacheStatConfigChannel := make(chan poller.HttpPollerConfig)
	cacheStatChannel := make(chan cache.Result)
	cacheStatPoller := poller.HttpPoller{
		ConfigChannel: cacheStatConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultCacheStatPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: cache.Handler{ResultChannel: cacheStatChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	sessionChannel := make(chan *traffic_ops.Session)
	monitorConfigChannel := make(chan traffic_ops.TrafficMonitorConfigMap)
	monitorOpsConfigChannel := make(chan handler.OpsConfig)
	monitorConfigPoller := poller.MonitorConfigPoller{
		Interval:         defaultMonitorConfigPollingInterval,
		SessionChannel:   sessionChannel,
		ConfigChannel:    monitorConfigChannel,
		OpsConfigChannel: monitorOpsConfigChannel,
	}

	opsConfigFileChannel := make(chan interface{})
	opsConfigFilePoller := poller.FilePoller{
		File:          opsConfigFile,
		ResultChannel: opsConfigFileChannel,
	}

	opsConfigChannel := make(chan handler.OpsConfig)
	opsConfigFileHandler := handler.OpsConfigFileHandler{
		ResultChannel:    opsConfigFilePoller.ResultChannel,
		OpsConfigChannel: opsConfigChannel,
	}

	peerConfigChannel := make(chan poller.HttpPollerConfig)
	peerChannel := make(chan peer.Result)
	peerPoller := poller.HttpPoller{
		ConfigChannel: peerConfigChannel,
		Config: poller.HttpPollerConfig{
			Interval: defaultPeerPollingInterval,
		},
		Fetcher: fetcher.HttpFetcher{
			Handler: peer.Handler{ResultChannel: peerChannel},
			Client:  sharedClient,
			Success: fetchSuccessCounter,
			Fail:    fetchFailCounter,
			Pending: fetchPendingGauge,
		},
	}

	go opsConfigFileHandler.Listen()
	go opsConfigFilePoller.Poll()
	go monitorConfigPoller.Poll()
	go cacheHealthPoller.Poll()
	go cacheStatPoller.Poll()
	go peerPoller.Poll()

	dr := make(chan http_server.DataRequest)

	healthHistory := make(map[string][]interface{})
	statHistory := make(map[string][]interface{})

	var opsConfig handler.OpsConfig
	var monitorConfig traffic_ops.TrafficMonitorConfigMap
	localStates := peer.Crstates{Caches: make(map[string]peer.IsAvailable), Deliveryservice: make(map[string]peer.Deliveryservice)}    // this is the local state as discoverer by this traffic_monitor
	peerStates := make(map[string]peer.Crstates)                                                                                       // each peer's last state is saved in this map
	combinedStates := peer.Crstates{Caches: make(map[string]peer.IsAvailable), Deliveryservice: make(map[string]peer.Deliveryservice)} // this is the result of combining the localStates and all the peerStates using the var ??

	deliveryServiceServers := map[string][]string{}
	serverTypes := map[string]string{}

	// TODO put stat data in a struct, for brevity
	lastHealthEndTimes := map[string]time.Time{}
	lastHealthDurations := map[string]time.Duration{}
	fetchCount := uint64(0) // note this is the number of individual caches fetched from, not the number of times all the caches were polled.
	healthIteration := uint64(0)
	errorCount := uint64(0)
	events := []Event{}
	eventIndex := uint64(0)
	for {
		select {
		case req := <-dr:
			defer close(req.C)

			var body []byte
			var err error

			switch req.T {
			case http_server.TR_CONFIG:
				if toSession != nil && opsConfig.CdnName != "" {
					body, err = toSession.CRConfigRaw(opsConfig.CdnName)
				}
			case http_server.TR_STATE_DERIVED:
				body, err = peer.CrStatesMarshall(combinedStates)
			case http_server.TR_STATE_SELF:
				body, err = peer.CrStatesMarshall(localStates)
			case http_server.CACHE_STATS:
				// TODO: add support for ?hc=N query param, stats=, wildcard, individual caches
				// add pp and date to the json:
				/*
					pp: "0=[my-ats-edge-cache-1], hc=[1]",
					date: "Thu Oct 09 20:28:36 UTC 2014"
				*/
				params := req.Parameters
				hc := 1
				if _, exists := params["hc"]; exists {
					v, err := strconv.Atoi(params["hc"][0])
					if err == nil {
						hc = v
					}
				}
				body, err = cache.StatsMarshall(statHistory, hc)
			case http_server.DS_STATS:
				body = []byte("TODO implement")
			case http_server.EVENT_LOG:
				body, err = json.Marshal(JSONEvents{Events: events})
			case http_server.PEER_STATES:
				body = []byte("TODO implement")
			case http_server.STAT_SUMMARY:
				body = []byte("TODO implement")
			case http_server.STATS:
				body, err = getStats(staticAppData, cacheHealthPoller.Config.Interval, lastHealthDurations, fetchCount, healthIteration, errorCount)
				if err != nil {
					// TODO send error to client
					errorCount++
					log.Printf("ERROR getting stats %v\n", err)
					continue
				}
			case http_server.CONFIG_DOC:
				opsConfigCopy := opsConfig
				// if the password is blank, leave it blank, so callers can see it's missing.
				if opsConfigCopy.Password != "" {
					opsConfigCopy.Password = "******"
				}
				body, err = json.Marshal(opsConfigCopy)
			default:
				body = []byte("TODO error message")
			}
			req.C <- body
		case oc := <-opsConfigFileHandler.OpsConfigChannel:
			var err error
			opsConfig = oc

			listenAddress := ":80" // default

			if opsConfig.HttpListener != "" {
				listenAddress = opsConfig.HttpListener
			}

			err = http_server.Run(dr, listenAddress)
			if err != nil {
				errorCount++
				log.Printf("MonitorConfigPoller: error creating HTTP server: %s\n", err)
				continue
			}

			toSession, err = traffic_ops.Login(opsConfig.Url, opsConfig.Username, opsConfig.Password, opsConfig.Insecure)
			if err != nil {
				errorCount++
				log.Printf("MonitorConfigPoller: error instantiating Session with traffic_ops: %s\n", err)
				continue
			}

			deliveryServiceServers, err = getDeliveryServiceServers(toSession, opsConfig.CdnName)
			if err != nil {
				errorCount++
				log.Printf("Error getting delivery service servers from Traffic Ops: %v\n", err)
				continue
			}

			serverTypes, err = getServerTypes(toSession, opsConfig.CdnName)
			if err != nil {
				errorCount++
				log.Printf("Error getting server types from Traffic Ops: %v\n", err)
				continue
			}

			// This must be in a goroutine, because the monitorConfigPoller tick sends to a channel this select listens for. Thus, if we block on sends to the monitorConfigPoller, we have a livelock race condition.
			go func() {
				monitorConfigPoller.OpsConfigChannel <- opsConfig // this is needed for cdnName
				monitorConfigPoller.SessionChannel <- toSession
			}()
		case monitorConfig = <-monitorConfigPoller.ConfigChannel:
			healthUrls := map[string]string{}
			statUrls := map[string]string{}
			peerUrls := map[string]string{}
			caches := map[string]string{}

			for _, srv := range monitorConfig.TrafficServer {
				caches[srv.HostName] = srv.Status

				if srv.Status == "ONLINE" {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: true}
					continue
				}
				if srv.Status == "OFFLINE" {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: false}
					continue
				}
				// seed states with available = false until our polling cycle picks up a result
				if _, exists := localStates.Caches[srv.HostName]; !exists {
					localStates.Caches[srv.HostName] = peer.IsAvailable{IsAvailable: false}
				}

				url := monitorConfig.Profile[srv.Profile].Parameters.HealthPollingURL
				r := strings.NewReplacer(
					"${hostname}", srv.FQDN,
					"${interface_name}", srv.InterfaceName,
					"application=system", "application=plugin.remap",
					"application=", "application=plugin.remap",
				)
				url = r.Replace(url)
				healthUrls[srv.HostName] = url
				r = strings.NewReplacer("application=plugin.remap", "application=")
				url = r.Replace(url)
				statUrls[srv.HostName] = url
			}

			for _, srv := range monitorConfig.TrafficMonitor {
				if srv.Status != "ONLINE" {
					continue
				}
				// TODO: the URL should be config driven. -jse
				url := fmt.Sprintf("http://%s:%d/publish/CrStates?raw", srv.IP, srv.Port)
				peerUrls[srv.HostName] = url
			}

			cacheStatPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: statUrls, Interval: defaultCacheStatPollingInterval}
			cacheHealthPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: healthUrls, Interval: defaultCacheHealthPollingInterval}
			peerPoller.ConfigChannel <- poller.HttpPollerConfig{Urls: peerUrls, Interval: defaultPeerPollingInterval}

			for k := range localStates.Caches {
				_, exists := monitorConfig.TrafficServer[k]

				if !exists {
					fmt.Printf("Warning: removing %s from localStates", k)
					delete(localStates.Caches, k)
				}
			}

			addStateDeliveryServices(monitorConfig, localStates.Deliveryservice)
		case i := <-cacheHealthTick:
			healthIteration = i
		case healthResult := <-cacheHealthChannel:
			fetchCount++
			var prevResult cache.Result
			if len(healthHistory[healthResult.Id]) != 0 {
				prevResult = healthHistory[healthResult.Id][len(healthHistory[healthResult.Id])-1].(cache.Result)
			}
			health.GetVitals(&healthResult, &prevResult, &monitorConfig)
			healthHistory[healthResult.Id] = pruneHistory(append(healthHistory[healthResult.Id], healthResult), defaultMaxHistory)
			isAvailable, whyAvailable := health.EvalCache(healthResult, &monitorConfig)
			if localStates.Caches[healthResult.Id].IsAvailable != isAvailable {
				fmt.Println("Changing state for", healthResult.Id, " was:", prevResult.Available, " is now:", isAvailable, " because:", whyAvailable, " errors:", healthResult.Errors)
				e := Event{
					Index:       eventIndex,
					Time:        time.Now().Unix(),
					Description: whyAvailable,
					Name:        healthResult.Id,
					Hostname:    healthResult.Id,
					Type:        serverTypes[healthResult.Id],
					Available:   isAvailable,
				}
				events = append([]Event{e}, events...)
				if len(events) > maxEvents {
					events = events[:maxEvents-1]
				}
				eventIndex++
			}
			localStates.Caches[healthResult.Id] = peer.IsAvailable{IsAvailable: isAvailable}
			calculateDeliveryServiceState(deliveryServiceServers, localStates.Caches, localStates.Deliveryservice)

			if lastHealthStart, ok := lastHealthEndTimes[healthResult.Id]; ok {
				lastHealthDurations[healthResult.Id] = time.Since(lastHealthStart)
			}
			lastHealthEndTimes[healthResult.Id] = time.Now()

			// if _, ok := queryIntervalStart[pollI]; !ok {
			// 	log.Printf("ERROR poll start index not found")
			// 	continue
			// }
			// lastQueryIntervalTime = time.Since(queryIntervalStart[pollI])
		case stats := <-cacheStatChannel:
			statHistory[stats.Id] = pruneHistory(append(statHistory[stats.Id], stats), defaultMaxHistory)
		case crStatesResult := <-peerChannel:
			peerStates[crStatesResult.Id] = crStatesResult.PeerStats
			combinedStates = combineCrStates(peerStates, localStates)
		}
	}
}