func watchFaes() { ch := make(chan []string, 10) go etclib.WatchService(etclib.SERVICE_FAE, ch) for { select { case <-ch: endpoints, err := etclib.ServiceEndpoints(etclib.SERVICE_FAE) if err == nil { log.Trace("fae endpoints updated: %+v", endpoints) dumpFaeConfigPhp(endpoints) } else { log.Error("fae: %s", err) } } } log.Warn("fae watcher died") }
func (this *Proxy) StartMonitorCluster() { if !this.Enabled() { log.Warn("servant proxy disabled by proxy config section") return } peersChan := make(chan []string, 10) go etclib.WatchService(etclib.SERVICE_FAE, peersChan) for { select { case <-peersChan: peers, err := etclib.ServiceEndpoints(etclib.SERVICE_FAE) if err == nil { if !this.clusterTopologyReady { this.clusterTopologyReady = true close(this.clusterTopologyChan) } if len(peers) == 0 { // TODO panic? log.Warn("Empty cluster fae peers") } else { // no lock, because running within 1 goroutine this.selector.SetPeersAddr(peers...) this.refreshPeers(peers) log.Info("Cluster latest fae nodes: %+v", peers) } } else { log.Error("Cluster peers: %s", err) } } } // should never get here log.Warn("Cluster peers monitor died") }
func (g *graph) write(w io.Writer) { g.mu.Lock() defer g.mu.Unlock() // get all fae dashboard url in cluster g.Peers = make([]string, 0, 10) if peers, err := etclib.ServiceEndpoints(etclib.SERVICE_FAE); err == nil && len(peers) > 0 { for _, peer := range peers { host, _, _ := net.SplitHostPort(peer) g.Peers = append(g.Peers, host+":"+g.port) } } if len(g.Qps) > (60 * 60 * 12 / 10) { // 12 hours // dashboard should never use up fae's memory g.Qps = []graphPoints{} g.Latencies = []graphPoints{} g.ActiveSessions = []graphPoints{} g.Errors = []graphPoints{} g.Slows = []graphPoints{} g.HeapAlloc = []graphPoints{} g.HeapReleased = []graphPoints{} g.HeapSys = []graphPoints{} g.NumGC = []graphPoints{} g.StackInUse = []graphPoints{} g.HeapObjects = []graphPoints{} g.GcPause100 = []graphPoints{} g.GcPause99 = []graphPoints{} g.GcPause95 = []graphPoints{} g.GcPause75 = []graphPoints{} } ts := int(time.Now().UnixNano() / 1e6) g.Qps = append(g.Qps, graphPoints{ts, int(g.rpcServer.stats.CallPerSecond.Rate1())}) g.ActiveSessions = append(g.ActiveSessions, graphPoints{ts, int(g.rpcServer.activeSessionN)}) g.Latencies = append(g.Latencies, graphPoints{ts, int(g.rpcServer.stats.CallLatencies.Mean())}) errs := atomic.LoadInt64(&g.rpcServer.cumCallErrs) g.Errors = append(g.Errors, graphPoints{ts, int(errs)}) slows := atomic.LoadInt64(&g.rpcServer.cumCallSlow) g.Slows = append(g.Slows, graphPoints{ts, int(slows)}) g.Calls = atomic.LoadInt64(&g.rpcServer.cumCalls) g.Sessions = atomic.LoadInt64(&g.rpcServer.cumSessions) memStats := new(runtime.MemStats) runtime.ReadMemStats(memStats) g.NumGC = append(g.NumGC, graphPoints{ts, int(memStats.NumGC)}) g.HeapSys = append(g.HeapSys, graphPoints{ts, int(memStats.HeapSys) / (1 << 20)}) g.HeapReleased = append(g.HeapReleased, graphPoints{ts, int(memStats.HeapReleased) / (1 << 20)}) g.HeapAlloc = append(g.HeapAlloc, graphPoints{ts, int(memStats.HeapAlloc) / (1 << 20)}) g.StackInUse = append(g.StackInUse, graphPoints{ts, int(memStats.StackInuse) / (1 << 20)}) g.HeapObjects = append(g.HeapObjects, graphPoints{ts, int(memStats.HeapObjects)}) // sort the GC pause array gcPausesMs := make(uint64Slice, 0, len(memStats.PauseNs)) for _, pauseNs := range memStats.PauseNs { if pauseNs == 0 { continue } pauseMs := pauseNs / uint64(time.Millisecond) if pauseMs == 0 { continue } gcPausesMs = append(gcPausesMs, pauseMs) } sort.Sort(gcPausesMs) g.GcPause100 = append(g.GcPause100, graphPoints{ts, int(percentile(100.0, gcPausesMs))}) g.GcPause99 = append(g.GcPause99, graphPoints{ts, int(percentile(99.0, gcPausesMs))}) g.GcPause95 = append(g.GcPause95, graphPoints{ts, int(percentile(95.0, gcPausesMs))}) g.GcPause75 = append(g.GcPause75, graphPoints{ts, int(percentile(75.0, gcPausesMs))}) g.Tpl.Execute(w, g) }