func Start() { if !g.Config().Rpc.Enabled { return } addr := g.Config().Rpc.Listen tcpAddr, err := net.ResolveTCPAddr("tcp", addr) if err != nil { log.Fatalf("net.ResolveTCPAddr fail: %s", err) } listener, err := net.ListenTCP("tcp", tcpAddr) if err != nil { log.Fatalf("listen %s fail: %s", addr, err) } else { log.Println("rpc listening", addr) } rpc.Register(new(Judge)) for { conn, err := listener.Accept() if err != nil { log.Printf("listener.Accept occur error: %s", err) continue } go rpc.ServeConn(conn) } }
func Start() { if !g.Config().Http.Enabled { return } addr := g.Config().Http.Listen if addr == "" { return } s := &http.Server{ Addr: addr, MaxHeaderBytes: 1 << 30, } log.Println("http listening", addr) log.Fatalln(s.ListenAndServe()) }
func SyncStrategies() { duration := time.Duration(g.Config().Hbs.Interval) * time.Second for { syncStrategies() syncExpression() time.Sleep(duration) } }
func configInfoRoutes() { // e.g. /strategy/lg-dinp-docker01.bj/cpu.idle http.HandleFunc("/strategy/", func(w http.ResponseWriter, r *http.Request) { urlParam := r.URL.Path[len("/strategy/"):] m := g.StrategyMap.Get() RenderDataJson(w, m[urlParam]) }) // e.g. /expression/net.port.listen/port=22 http.HandleFunc("/expression/", func(w http.ResponseWriter, r *http.Request) { urlParam := r.URL.Path[len("/expression/"):] m := g.ExpressionMap.Get() RenderDataJson(w, m[urlParam]) }) http.HandleFunc("/count", func(w http.ResponseWriter, r *http.Request) { sum := 0 arr := []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"} for i := 0; i < 16; i++ { for j := 0; j < 16; j++ { sum += store.HistoryBigMap[arr[i]+arr[j]].Len() } } out := fmt.Sprintf("total: %d\n", sum) w.Write([]byte(out)) }) http.HandleFunc("/history/", func(w http.ResponseWriter, r *http.Request) { urlParam := r.URL.Path[len("/history/"):] pk := utils.Md5(urlParam) L, exists := store.HistoryBigMap[pk[0:2]].Get(pk) if !exists || L.Len() == 0 { w.Write([]byte("not found\n")) return } arr := []string{} datas, _ := L.HistoryData(g.Config().Remain - 1) for i := 0; i < len(datas); i++ { if datas[i] == nil { continue } str := fmt.Sprintf( "%d %s %v\n", datas[i].Timestamp, utils.UnixTsFormat(datas[i].Timestamp), datas[i].Value, ) arr = append(arr, str) } w.Write([]byte(strings.Join(arr, ""))) }) }
func (this *Judge) Send(items []*model.JudgeItem, resp *model.SimpleRpcResponse) error { remain := g.Config().Remain // 把当前时间的计算放在最外层,是为了减少获取时间时的系统调用开销 now := time.Now().Unix() for _, item := range items { pk := item.PrimaryKey() store.HistoryBigMap[pk[0:2]].PushFrontAndMaintain(pk, item, remain, now) } return nil }
func rebuildStrategyMap(strategiesResponse *model.StrategiesResponse) { // endpoint:metric => [strategy1, strategy2 ...] m := make(map[string][]model.Strategy) for _, hs := range strategiesResponse.HostStrategies { hostname := hs.Hostname if g.Config().Debug && hostname == g.Config().DebugHost { log.Println(hostname, "strategies:") bs, _ := json.Marshal(hs.Strategies) fmt.Println(string(bs)) } for _, strategy := range hs.Strategies { key := fmt.Sprintf("%s/%s", hostname, strategy.Metric) if _, exists := m[key]; exists { m[key] = append(m[key], strategy) } else { m[key] = []model.Strategy{strategy} } } } g.StrategyMap.ReInit(m) }
func sendEvent(event *model.Event) { // update last event g.LastEvents.Set(event.Id, event) bs, err := json.Marshal(event) if err != nil { log.Printf("json marshal event %v fail: %v", event, err) return } // send to redis redisKey := fmt.Sprintf(g.Config().Alarm.QueuePattern, event.Priority()) rc := g.RedisConnPool.Get() defer rc.Close() rc.Do("LPUSH", redisKey, string(bs)) }
func sendEventIfNeed(historyData []*model.HistoryData, isTriggered bool, now int64, event *model.Event, maxStep int) { lastEvent, exists := g.LastEvents.Get(event.Id) if isTriggered { event.Status = "PROBLEM" if !exists || lastEvent.Status[0] == 'O' { // 本次触发了阈值,之前又没报过警,得产生一个报警Event event.CurrentStep = 1 // 但是有些用户把最大报警次数配置成了0,相当于屏蔽了,要检查一下 if maxStep == 0 { return } sendEvent(event) return } // 逻辑走到这里,说明之前Event是PROBLEM状态 if lastEvent.CurrentStep >= maxStep { // 报警次数已经足够多,到达了最多报警次数了,不再报警 return } if historyData[len(historyData)-1].Timestamp <= lastEvent.EventTime { // 产生过报警的点,就不能再使用来判断了,否则容易出现一分钟报一次的情况 // 只需要拿最后一个historyData来做判断即可,因为它的时间最老 return } if now-lastEvent.EventTime < g.Config().Alarm.MinInterval { // 报警不能太频繁,两次报警之间至少要间隔MinInterval秒,否则就不能报警 return } event.CurrentStep = lastEvent.CurrentStep + 1 sendEvent(event) } else { // 如果LastEvent是Problem,报OK,否则啥都不做 if exists && lastEvent.Status[0] == 'P' { event.Status = "OK" event.CurrentStep = 1 sendEvent(event) } } }
func configCommonRoutes() { http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte("ok")) }) http.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(g.VERSION)) }) http.HandleFunc("/workdir", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, file.SelfDir()) }) http.HandleFunc("/config/reload", func(w http.ResponseWriter, r *http.Request) { if strings.HasPrefix(r.RemoteAddr, "127.0.0.1") { g.ParseConfig(g.ConfigFile) RenderDataJson(w, g.Config()) } else { w.Write([]byte("no privilege")) } }) }