func (this *Start) shutdown() { // kill haproxy log.Info("killling haproxy processes") f, e := os.Open(haproxyPidFile) if e != nil { log.Error("shutdown %v", e) return } reader := bufio.NewReader(f) for { l, e := gio.ReadLine(reader) if e != nil { // EOF break } pid, _ := strconv.Atoi(string(l)) p := &os.Process{ Pid: pid, } if err := p.Kill(); err != nil { log.Error(err) } else { log.Info("haproxy[%d] terminated", pid) } } log.Info("removing %s", haproxyPidFile) os.Remove(haproxyPidFile) }
func (this *Mirror) Main() (exitCode int) { this.quit = make(chan struct{}) signal.RegisterHandler(func(sig os.Signal) { log.Info("received signal: %s", strings.ToUpper(sig.String())) log.Info("quiting...") this.once.Do(func() { close(this.quit) }) }, syscall.SIGINT, syscall.SIGTERM) limit := (1 << 20) * this.BandwidthLimit / 8 if this.BandwidthLimit > 0 { this.bandwidthRateLimiter = ratelimiter.NewLeakyBucket(limit*10, time.Second*10) } log.Info("starting mirror@%s", gafka.BuildId) // pprof debugAddr := ":10009" go http.ListenAndServe(debugAddr, nil) log.Info("pprof ready on %s", debugAddr) z1 := zk.NewZkZone(zk.DefaultConfig(this.Z1, ctx.ZoneZkAddrs(this.Z1))) z2 := zk.NewZkZone(zk.DefaultConfig(this.Z2, ctx.ZoneZkAddrs(this.Z2))) c1 := z1.NewCluster(this.C1) c2 := z2.NewCluster(this.C2) this.runMirror(c1, c2, limit) log.Info("bye mirror@%s, %s", gafka.BuildId, time.Since(this.startedAt)) log.Close() return }
func (this *mysqlStore) Start() error { if err := this.refreshFromMysql(); err != nil { // refuse to start if mysql conn fails return err } // TODO watch KatewayMysqlDsn znode go func() { ticker := time.NewTicker(this.cf.Refresh) defer ticker.Stop() for { select { case <-ticker.C: this.refreshFromMysql() log.Info("manager refreshed from mysql") case <-this.refreshCh: this.refreshFromMysql() log.Info("manager forced to refresh from mysql") case <-this.shutdownCh: log.Info("mysql manager stopped") return } } }() return nil }
func (this *Monitor) Start() { this.leader = true this.leadAt = time.Now() this.stop = make(chan struct{}) go func() { log.Info("telemetry started: %s", telemetry.Default.Name()) if err := telemetry.Default.Start(); err != nil { log.Error("telemetry: %v", err) } }() this.inflight = new(sync.WaitGroup) this.watchers = this.watchers[:0] for name, watcherFactory := range registeredWatchers { watcher := watcherFactory() this.watchers = append(this.watchers, watcher) watcher.Init(this) log.Info("created and starting watcher: %s", name) this.inflight.Add(1) go watcher.Run() } log.Info("all watchers ready!") <-this.stop this.inflight.Wait() log.Info("all watchers stopped") }
func (this *Start) reloadHAproxy() (err error) { var cmd *exec.Cmd = nil waitStartCh := make(chan struct{}) if this.starting { log.Info("haproxy starting") cmd = exec.Command(this.command, "-f", configFile) // TODO use absolute path this.starting = false go func() { <-waitStartCh log.Info("haproxy started") if err := cmd.Wait(); err != nil { log.Error("haproxy: %v", err) } }() } else { shellScript := fmt.Sprintf("%s -f %s/%s -sf `cat %s/%s`", this.command, this.root, configFile, this.root, haproxyPidFile) log.Info("haproxy reloading: %s", shellScript) cmd = exec.Command("/bin/sh", "-c", shellScript) go func() { <-waitStartCh log.Info("haproxy reloaded") if err := cmd.Wait(); err != nil { log.Error("haproxy: %v", err) } }() } if err = cmd.Start(); err == nil { waitStartCh <- struct{}{} } return err }
func (this *mysqlStore) Start() error { if err := this.refreshFromMysql(); err != nil { // refuse to start if mysql conn fails return fmt.Errorf("manager[%s]: %v", this.Name(), err) } go func() { ticker := time.NewTicker(this.cf.Refresh) defer ticker.Stop() for { select { case <-ticker.C: if err := this.refreshFromMysql(); err != nil { log.Error(err.Error()) } else { log.Info("manager refreshed from mysql") } case <-this.refreshCh: if err := this.refreshFromMysql(); err != nil { log.Error(err.Error()) } else { log.Info("manager forced to refresh from mysql") } case <-this.shutdownCh: log.Info("mysql manager stopped") return } } }() return nil }
func (this *Partition) addPartition(zkAddrs string, topic string, partitions int) error { log.Info("adding partitions to topic: %s", topic) cmd := pipestream.New(fmt.Sprintf("%s/bin/kafka-topics.sh", ctx.KafkaHome()), fmt.Sprintf("--zookeeper %s", zkAddrs), fmt.Sprintf("--alter"), fmt.Sprintf("--topic %s", topic), fmt.Sprintf("--partitions %d", partitions), ) err := cmd.Open() if err != nil { return err } scanner := bufio.NewScanner(cmd.Reader()) scanner.Split(bufio.ScanLines) for scanner.Scan() { this.Ui.Output(color.Yellow(scanner.Text())) } err = scanner.Err() if err != nil { return err } cmd.Close() log.Info("added partitions to topic: %s", topic) return nil }
func loadTemplates() { if config.faeTemplateFile != "" { body, err := ioutil.ReadFile(config.faeTemplateFile) if err != nil { log.Error("template[%s]: %s", config.faeTemplateFile, err) } else { faeTemplateContents = string(body) log.Info("template[%s] loaded", config.faeTemplateFile) } } if config.actorTemplateFile != "" { body, err := ioutil.ReadFile(config.actorTemplateFile) if err != nil { log.Error("template[%s]: %s", config.actorTemplateFile, err) } else { maintainTemplateContents = string(body) log.Info("template[%s] loaded", config.actorTemplateFile) } } if config.maintainTemplateFile != "" { body, err := ioutil.ReadFile(config.maintainTemplateFile) if err != nil { log.Error("template[%s]: %s", config.maintainTemplateFile, err) } else { maintainTemplateContents = string(body) log.Info("template[%s] loaded", config.maintainTemplateFile) } } }
func shutdown() { log.Info("unregistering etcd") engineRunner.UnregisterEtcd() cleanup() log.Info("Terminated") os.Exit(0) }
func (this *Start) main() { ctx.LoadFromHome() this.zkzone = zk.NewZkZone(zk.DefaultConfig(this.zone, ctx.ZoneZkAddrs(this.zone))) zkConnEvt, ok := this.zkzone.SessionEvents() if !ok { panic("someone stealing my events") } registry.Default = zkr.New(this.zkzone) log.Info("ehaproxy[%s] starting...", gafka.BuildId) go this.runMonitorServer(this.httpAddr) zkConnected := false for { instances, instancesChange, err := registry.Default.WatchInstances() if err != nil { log.Error("zone[%s] %s", this.zkzone.Name(), err) time.Sleep(time.Second) continue } if zkConnected { if len(instances) > 0 { this.reload(instances) } else { // resilience to zk problem by local cache log.Warn("backend all shutdown? skip this change") time.Sleep(time.Second) continue } } select { case <-this.quitCh: return case evt := <-zkConnEvt: if evt.State == zklib.StateHasSession && !zkConnected { log.Info("zk connected") zkConnected = true } else if zkConnected { log.Warn("zk jitter: %+v", evt) } case <-instancesChange: log.Info("instances changed!!") } } }
func (this *Engine) StopRpcServe() { log.Info("RPC server stopping...") rpcServer := this.rpcServer.(*TFunServer) rpcServer.Stop() close(this.stopChan) outstandingSessions := atomic.LoadInt64(&rpcServer.activeSessionN) log.Warn("RPC outstanding sessions: %d", outstandingSessions) this.svt.Flush() log.Info("RPC server stopped gracefully") }
func (this *controller) RunForever() (err error) { log.Info("controller[%s] starting", this.Id()) if err = this.orchestrator.RegisterActor(this.Id(), this.Bytes()); err != nil { return err } defer this.orchestrator.ResignActor(this.Id()) if err = manager.Default.Start(); err != nil { return } log.Trace("manager[%s] started", manager.Default.Name()) go this.runWebServer() jobDispatchQuit := make(chan struct{}) go this.dispatchJobQueues(jobDispatchQuit) webhookDispatchQuit := make(chan struct{}) go this.dispatchWebhooks(webhookDispatchQuit) select { case <-jobDispatchQuit: log.Warn("dispatchJobQueues quit") case <-webhookDispatchQuit: log.Warn("dispatchWebhooks quit") } manager.Default.Stop() log.Trace("manager[%s] stopped", manager.Default.Name()) return }
func (this *Peer) discoverPeers() { defer func() { this.c.Close() // leave the multicast group }() var msg peerMessage reader := bufio.NewReader(this.c) for { // net.ListenMulticastUDP sets IP_MULTICAST_LOOP=0 as // default, so you never receive your own sent data // if you run both sender and receiver on (logically) same IP host line, _, err := reader.ReadLine() if err != nil { log.Error(err) continue } if err := msg.unmarshal(line); err != nil { // Not our protocol, it may be SSDP or else continue } log.Debug("received peer: %+v", msg) neighborIp, present := msg["ip"] if !present { log.Info("Peer msg has no 'ip'") continue } this.refreshNeighbor(neighborIp.(string)) } }
func (this *WatchActord) Run() { defer this.Wg.Done() ticker := time.NewTicker(this.Tick) defer ticker.Stop() jobQueues := metrics.NewRegisteredGauge("actord.jobqueues", nil) actors := metrics.NewRegisteredGauge("actord.actors", nil) orphan := metrics.NewRegisteredGauge("actord.orphan", nil) backlog := metrics.NewRegisteredGauge("actord.backlog.30s", nil) archive := metrics.NewRegisteredGauge("actord.archive.30s", nil) webhooks := metrics.NewRegisteredGauge("actord.webhooks", nil) for { select { case <-this.Stop: log.Info("%s stopped", this.ident()) return case now := <-ticker.C: jLen, aLen, orphanN, backlogN, archiveN := this.watchJobs(now) jobQueues.Update(jLen) actors.Update(aLen) orphan.Update(orphanN) backlog.Update(backlogN) archive.Update(archiveN) this.watchWebhooks(webhooks, now) } } }
// TODO from live meta or zk? func (this *pubPool) RefreshBrokerList(brokerList []string) { if len(brokerList) == 0 { if len(this.brokerList) > 0 { log.Warn("%s meta store found empty broker list, refresh refused", this.cluster) } return } setOld, setNew := set.NewSet(), set.NewSet() for _, b := range this.brokerList { setOld.Add(b) } for _, b := range brokerList { setNew.Add(b) } if !setOld.Equal(setNew) { log.Info("%s broker list from %+v to %+v", this.cluster, this.brokerList, brokerList) // rebuild the kafka conn pool this.brokerList = brokerList this.Close() this.buildPools() } }
func (this *WatchZk) Run() { defer this.Wg.Done() ticker := time.NewTicker(this.Tick) defer ticker.Stop() qps := metrics.NewRegisteredGauge("zk.qps", nil) conns := metrics.NewRegisteredGauge("zk.conns", nil) znodes := metrics.NewRegisteredGauge("zk.znodes", nil) deadNodes := metrics.NewRegisteredGauge("zk.dead", nil) for { select { case <-this.Stop: log.Info("zk.zk stopped") return case <-ticker.C: r, c, z, d := this.collectMetrics() if this.lastReceived > 0 { qps.Update((r - this.lastReceived) / int64(this.Tick.Seconds())) } this.lastReceived = r conns.Update(c) znodes.Update(z) deadNodes.Update(d) } } }
func shutdown() { cleanup() log.Info("Terminated") os.Exit(0) }
func (this *WatchSub) Run() { defer this.Wg.Done() this.zkclusters = this.Zkzone.PublicClusters() // TODO sync with clusters change ticker := time.NewTicker(this.Tick) defer ticker.Stop() subLagGroups := metrics.NewRegisteredGauge("sub.lags", nil) subConflictGroup := metrics.NewRegisteredGauge("sub.conflict", nil) for { select { case <-this.Stop: log.Info("kateway.sub stopped") return case <-ticker.C: //lags := this.subLags() // DISABLED subLagGroups.Update(int64(0)) conflictGroups := this.subConflicts() subConflictGroup.Update(int64(conflictGroups)) } } }
// @rest GET /v1/status func (this *manServer) statusHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) { log.Info("status %s(%s)", r.RemoteAddr, getHttpRemoteIp(r)) output := make(map[string]interface{}) output["options"] = Options output["loglevel"] = logLevel.String() output["manager"] = manager.Default.Dump() pubConns := int(atomic.LoadInt32(&this.gw.pubServer.activeConnN)) subConns := int(atomic.LoadInt32(&this.gw.subServer.activeConnN)) output["pubconn"] = strconv.Itoa(pubConns) output["subconn"] = strconv.Itoa(subConns) output["hh_appends"] = strconv.FormatInt(hh.Default.AppendN(), 10) output["hh_delivers"] = strconv.FormatInt(hh.Default.DeliverN(), 10) output["goroutines"] = strconv.Itoa(runtime.NumGoroutine()) var mem runtime.MemStats runtime.ReadMemStats(&mem) output["heap"] = gofmt.ByteSize(mem.HeapSys).String() output["objects"] = gofmt.Comma(int64(mem.HeapObjects)) b, err := json.MarshalIndent(output, "", " ") if err != nil { log.Error("%s(%s) %v", r.RemoteAddr, getHttpRemoteIp(r), err) } w.Write(b) }
// @rest POST /v1/jobs/:appid/:topic/:ver func (this *manServer) createJobHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) { topic := params.ByName(UrlParamTopic) if !manager.Default.ValidateTopicName(topic) { log.Warn("illegal topic: %s", topic) writeBadRequest(w, "illegal topic") return } realIp := getHttpRemoteIp(r) if !this.throttleAddTopic.Pour(realIp, 1) { writeQuotaExceeded(w) return } hisAppid := params.ByName(UrlParamAppid) appid := r.Header.Get(HttpHeaderAppid) pubkey := r.Header.Get(HttpHeaderPubkey) ver := params.ByName(UrlParamVersion) if !manager.Default.AuthAdmin(appid, pubkey) { log.Warn("suspicous create job %s(%s) {appid:%s pubkey:%s topic:%s ver:%s}", r.RemoteAddr, realIp, appid, pubkey, topic, ver) writeAuthFailure(w, manager.ErrAuthenticationFail) return } cluster, found := manager.Default.LookupCluster(hisAppid) if !found { log.Error("create job %s(%s) {appid:%s topic:%s ver:%s} invalid appid", r.RemoteAddr, realIp, hisAppid, topic, ver) writeBadRequest(w, "invalid appid") return } log.Info("create job[%s] %s(%s) {appid:%s topic:%s ver:%s}", appid, r.RemoteAddr, realIp, hisAppid, topic, ver) rawTopic := manager.Default.KafkaTopic(hisAppid, topic, ver) if err := job.Default.CreateJobQueue(Options.AssignJobShardId, hisAppid, rawTopic); err != nil { log.Error("create job[%s] %s(%s) {shard:%d appid:%s topic:%s ver:%s} %v", appid, r.RemoteAddr, realIp, Options.AssignJobShardId, hisAppid, topic, ver, err) writeServerError(w, err.Error()) return } if err := this.gw.zkzone.CreateJobQueue(rawTopic, cluster); err != nil { log.Error("app[%s] %s(%s) create job: {shard:%d appid:%s topic:%s ver:%s} %v", appid, r.RemoteAddr, realIp, Options.AssignJobShardId, hisAppid, topic, ver, err) writeServerError(w, err.Error()) return } w.WriteHeader(http.StatusCreated) w.Write(ResponseOk) }
//go:generate goannotation $GOFILE // @rest GET /v1/schema/:appid/:topic/:ver func (this *manServer) schemaHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) { hisAppid := params.ByName(UrlParamAppid) myAppid := r.Header.Get(HttpHeaderAppid) topic := params.ByName(UrlParamTopic) ver := params.ByName(UrlParamVersion) realIp := getHttpRemoteIp(r) log.Info("schema[%s] %s(%s) {app:%s topic:%s ver:%s UA:%s}", myAppid, r.RemoteAddr, realIp, hisAppid, topic, ver, r.Header.Get("User-Agent")) // TODO authorization _, found := manager.Default.LookupCluster(hisAppid) if !found { writeBadRequest(w, "invalid appid") return } // TODO lookup from manager and send reponse schema, err := manager.Default.TopicSchema(hisAppid, topic, ver) if err != nil { writeBadRequest(w, err.Error()) return } w.Write([]byte(strings.TrimSpace(schema))) }
func (this *WatchAppError) Run() { defer this.Wg.Done() return // disable for now TODO appError := metrics.NewRegisteredCounter("kateway.apperr", nil) msgChan := make(chan *sarama.ConsumerMessage, 2000) if err := this.consumeAppErrLogs(msgChan); err != nil { close(msgChan) log.Error("%v", err) return } for { select { case <-this.Stop: log.Info("kateway.apperr stopped") return case msg, ok := <-msgChan: if !ok { return } appError.Inc(1) log.Warn("%d/%d %s", msg.Partition, msg.Offset, string(msg.Value)) } } }
func (this *Gateway) stop() { this.shutdownOnce.Do(func() { log.Info("stopping gateway...") close(this.shutdownCh) }) }
func (this *Engine) stopHttpServ() { if this.httpListener != nil { this.httpListener.Close() log.Info("HTTP server stopped") } }
// TODO auth func (this *Start) statusHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json; charset=utf8") w.Header().Set("Server", "ehaproxy") log.Info("%s status", r.RemoteAddr) var ( wg sync.WaitGroup mu sync.Mutex aggStats = make(map[string]map[string]int64) ) for i := 0; i < ctx.NumCPU(); i++ { wg.Add(1) go func(i int) { defer wg.Done() uri := fmt.Sprintf("http://127.0.0.1:%d/stats?stats;csv", i+dashboardPortHead) stats := fetchDashboardStats(uri) mu.Lock() for name, colVals := range stats { if _, present := aggStats[name]; !present { aggStats[name] = make(map[string]int64) } for k, v := range colVals { aggStats[name][k] += v } } mu.Unlock() }(i) } wg.Wait() b, _ := json.Marshal(aggStats) w.Write(b) }
func (this *WatchPubsub) Run() { defer this.Wg.Done() ticker := time.NewTicker(this.Tick) defer ticker.Stop() this.startedAt = time.Now() pubsubHealth := metrics.NewRegisteredGauge("kateway.pubsub.fail", nil) this.pubLatency = metrics.NewRegisteredHistogram("kateway.pubsub.latency.pub", nil, metrics.NewExpDecaySample(1028, 0.015)) this.subLatency = metrics.NewRegisteredHistogram("kateway.pubsub.latency.sub", nil, metrics.NewExpDecaySample(1028, 0.015)) for { select { case <-this.Stop: log.Info("kateway.pubsub stopped") return case <-ticker.C: if err := this.runCheckup(); err != nil { pubsubHealth.Update(1) } else { pubsubHealth.Update(0) } } } }
// CallSOS will send SOS message to the zone wide kguard leader. func (this *ZkZone) CallSOS(caller string, msg string) { log.Critical("SOS[%s] %s: sending...", caller, msg) // kguard leader might float, so refresh on each SOS message kguards, err := this.KguardInfos() if err != nil { log.Error("SOS[%s] %s: %v", caller, msg, err) return } leader := kguards[0] request := gorequest.New().Timeout(time.Second * 10) res, body, errs := request.Post(fmt.Sprintf("http://%s:%d", leader.Host, telemetry.SOSPort)). Set("User-Agent", fmt.Sprintf("sos-go-%s", gafka.BuildId)). Set(telemetry.SOSIdentHeader, caller). End() if len(errs) > 0 { log.Error("SOS[%s] %s: %+v", caller, msg, errs) return } if res.StatusCode != http.StatusAccepted { log.Error("SOS[%s] %s: HTTP %s %s", caller, msg, http.StatusText(res.StatusCode), body) return } log.Info("SOS[%s] %s: sent ok", caller, msg) }
func (this *WatchLoadAvg) Run() { defer this.Wg.Done() ticker := time.NewTicker(time.Minute) defer ticker.Stop() loadHigh := metrics.NewRegisteredGauge("zone.highload", nil) for { select { case <-this.Stop: log.Info("zone.load stopped") return case <-ticker.C: n, err := this.highLoadCount() if err != nil { log.Error("%v", err) } else { loadHigh.Update(n) } } } }
func (this *WatchExec) Run() { defer this.Wg.Done() if this.confDir == "" { log.Warn("empty confd, external.exec disabled") return } ticker := time.NewTicker(time.Minute) defer ticker.Stop() if err := this.watchConfigDir(); err != nil { log.Error("%v", err) return } for { select { case <-this.Stop: log.Info("external.exec stopped") return case <-ticker.C: } } }
func (this *Engine) LoadConfigFile() *Engine { log.Info("Engine[%s] loading config file %s", BuildID, this.configFile) cf := new(engineConfig) var err error cf.Conf, err = conf.Load(this.configFile) if err != nil { panic(err) } this.conf = cf this.doLoadConfig() // RegisterHttpApi is ready this.setupHttpServ() // when config loaded, create the servants svr := servant.NewFunServant(config.Servants) this.rpcProcessor = rpc.NewFunServantProcessor(svr) svr.Start() this.peer = peer.NewPeer(this.conf.peerGroupAddr, this.conf.peerHeartbeatInterval, this.conf.peerDeadThreshold) return this }