func StartSocket() { if !g.Config().Socket.Enable { return } addr := g.Config().Socket.Listen tcpAddr, err := net.ResolveTCPAddr("tcp", addr) if err != nil { log.Fatalf("net.ResolveTCPAddr fail: %s", err) } listener, err := net.ListenTCP("tcp", tcpAddr) if err != nil { log.Fatalf("listen %s fail: %s", addr, err) } else { log.Println("socket listening", addr) } defer listener.Close() for { conn, err := listener.Accept() if err != nil { log.Println("listener.Accept occur error:", err) continue } go socketTelnetHandle(conn) } }
func StartRpc() { if !g.Config().Rpc.Enabled { return } addr := g.Config().Rpc.Listen tcpAddr, err := net.ResolveTCPAddr("tcp", addr) if err != nil { log.Fatalf("net.ResolveTCPAddr fail: %s", err) } listener, err := net.ListenTCP("tcp", tcpAddr) if err != nil { log.Fatalf("listen %s fail: %s", addr, err) } else { log.Println("rpc listening", addr) } server := rpc.NewServer() server.Register(new(Transfer)) for { conn, err := listener.Accept() if err != nil { log.Println("listener.Accept occur error:", err) continue } // go rpc.ServeConn(conn) go server.ServeCodec(jsonrpc.NewServerCodec(conn)) } }
func configCommonRoutes() { http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte("ok\n")) }) http.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(fmt.Sprintf("%s\n", g.VERSION))) }) http.HandleFunc("/workdir", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(fmt.Sprintf("%s\n", file.SelfDir()))) }) http.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, g.Config()) }) http.HandleFunc("/config/reload", func(w http.ResponseWriter, r *http.Request) { if strings.HasPrefix(r.RemoteAddr, "127.0.0.1") { g.ParseConfig(g.ConfigFile) RenderDataJson(w, "ok") } else { RenderDataJson(w, "no privilege") } }) }
func socketTelnetHandle(conn net.Conn) { defer conn.Close() items := []*cmodel.MetaData{} buf := bufio.NewReader(conn) cfg := g.Config() timeout := time.Duration(cfg.Socket.Timeout) * time.Second for { conn.SetReadDeadline(time.Now().Add(timeout)) line, err := buf.ReadString('\n') if err != nil { break } line = strings.Trim(line, "\n") if line == "quit" { break } if line == "" { continue } t := strings.Fields(line) if len(t) < 2 { continue } cmd := t[0] if cmd != "update" { continue } item, err := convertLine2MetaData(t[1:]) if err != nil { continue } items = append(items, item) } // statistics count := int64(len(items)) pfc.Meter("SocketRecv", count) pfc.Meter("Recv", count) if cfg.Transfer.Enabled { sender.Push2SendQueue(items) } return }
func startHttpServer() { if !g.Config().Http.Enable { return } addr := g.Config().Http.Listen if addr == "" { return } configCommonRoutes() configProcHttpRoutes() configApiHttpRoutes() s := &http.Server{ Addr: addr, MaxHeaderBytes: 1 << 30, } log.Println("http.startHttpServer ok, listening", addr) log.Fatalln(s.ListenAndServe()) }
func initConnPools() { cfg := g.Config() // init transfer global configs addrs := make([]string, 0) for hn, addr := range cfg.Transfer.Cluster { TransferHostnames = append(TransferHostnames, hn) addrs = append(addrs, addr) TransferMap[hn] = addr } // init transfer send cnt for hn, addr := range cfg.Transfer.Cluster { TransferSendCnt[hn] = nproc.NewSCounterQps(hn + ":" + addr) TransferSendFailCnt[hn] = nproc.NewSCounterQps(hn + ":" + addr) } // init conn pools SenderConnPools = cpool.CreateSafeRpcConnPools(cfg.Transfer.MaxConns, cfg.Transfer.MaxIdle, cfg.Transfer.ConnTimeout, cfg.Transfer.CallTimeout, addrs) }
// process new metric values func RecvMetricValues(args []*cmodel.MetricValue, reply *g.TransferResp, from string) error { start := time.Now() reply.ErrInvalid = 0 items := []*cmodel.MetaData{} for _, v := range args { if v == nil { reply.ErrInvalid += 1 continue } // 历史遗留问题. // 老版本agent上报的metric=kernel.hostname的数据,其取值为string类型,现在已经不支持了;所以,这里硬编码过滤掉 if v.Metric == "kernel.hostname" { reply.ErrInvalid += 1 continue } if v.Metric == "" || v.Endpoint == "" { reply.ErrInvalid += 1 continue } if v.Type != g.COUNTER && v.Type != g.GAUGE && v.Type != g.DERIVE { reply.ErrInvalid += 1 continue } if v.Value == "" { reply.ErrInvalid += 1 continue } if v.Step <= 0 { reply.ErrInvalid += 1 continue } if len(v.Metric)+len(v.Tags) > 510 { reply.ErrInvalid += 1 continue } errtags, tags := cutils.SplitTagsString(v.Tags) if errtags != nil { reply.ErrInvalid += 1 continue } // TODO 呵呵,这里需要再优雅一点 now := start.Unix() if v.Timestamp <= 0 || v.Timestamp > now*2 { v.Timestamp = now } fv := &cmodel.MetaData{ Metric: v.Metric, Endpoint: v.Endpoint, Timestamp: v.Timestamp, Step: v.Step, CounterType: v.Type, Tags: tags, //TODO tags键值对的个数,要做一下限制 } valid := true var vv float64 var err error switch cv := v.Value.(type) { case string: vv, err = strconv.ParseFloat(cv, 64) if err != nil { valid = false } case float64: vv = cv case int64: vv = float64(cv) default: valid = false } if !valid { reply.ErrInvalid += 1 continue } fv.Value = vv items = append(items, fv) } // statistics cnt := int64(len(items)) pfc.Meter("Recv", cnt) if from == "rpc" { pfc.Meter("RpcRecv", cnt) } else if from == "http" { pfc.Meter("HttpRecv", cnt) } cfg := g.Config() if cfg.Transfer.Enabled { sender.Push2SendQueue(items) } reply.Msg = "ok" reply.Total = len(args) reply.Latency = (time.Now().UnixNano() - start.UnixNano()) / 1000000 return nil }
func forward2TransferTask(Q *nlist.SafeListLimited, concurrent int32) { cfg := g.Config() batch := int(cfg.Transfer.Batch) maxConns := int64(cfg.Transfer.MaxConns) retry := int(cfg.Transfer.Retry) if retry < 1 { retry = 1 } sema := nsema.NewSemaphore(int(concurrent)) transNum := len(TransferHostnames) for { items := Q.PopBackBy(batch) count := len(items) if count == 0 { time.Sleep(time.Millisecond * 50) continue } transItems := make([]*cmodel.MetricValue, count) for i := 0; i < count; i++ { transItems[i] = convert(items[i].(*cmodel.MetaData)) } sema.Acquire() go func(transItems []*cmodel.MetricValue, count int) { defer sema.Release() var err error // 随机遍历transfer列表,直到数据发送成功 或者 遍历完;随机遍历,可以缓解慢transfer resp := &g.TransferResp{} sendOk := false for j := 0; j < retry && !sendOk; j++ { rint := rand.Int() for i := 0; i < transNum && !sendOk; i++ { idx := (i + rint) % transNum host := TransferHostnames[idx] addr := TransferMap[host] // 过滤掉建连缓慢的host, 否则会严重影响发送速率 cc := pfc.GetCounterCount(host) if cc >= maxConns { continue } pfc.Counter(host, 1) err = SenderConnPools.Call(addr, "Transfer.Update", transItems, resp) pfc.Counter(host, -1) if err == nil { sendOk = true // statistics TransferSendCnt[host].IncrBy(int64(count)) } else { // statistics TransferSendFailCnt[host].IncrBy(int64(count)) } } } // statistics if !sendOk { if cfg.Debug { log.Printf("send to transfer fail, connpool:%v", SenderConnPools.Proc()) } pfc.Meter("SendFail", int64(count)) } else { pfc.Meter("Send", int64(count)) } }(transItems, count) } }
func startSendTasks() { cfg := g.Config() concurrent := cfg.Transfer.MaxConns * int32(len(cfg.Transfer.Cluster)) go forward2TransferTask(SenderQueue, concurrent) }
func initConnPools() { cfg := g.Config() SenderConnPools = cpool.CreateSafeRpcConnPools(cfg.Transfer.MaxConns, cfg.Transfer.MaxIdle, cfg.Transfer.ConnTimeout, cfg.Transfer.CallTimeout, []string{cfg.Transfer.Addr}) }