Exemplo n.º 1
0
func StartSocket() {
	if !g.Config().Socket.Enabled {
		return
	}

	addr := g.Config().Socket.Listen
	tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
	if err != nil {
		log.Fatalf("net.ResolveTCPAddr fail: %s", err)
	}

	listener, err := net.ListenTCP("tcp", tcpAddr)
	if err != nil {
		log.Fatalf("listen %s fail: %s", addr, err)
	} else {
		log.Println("socket listening", addr)
	}

	defer listener.Close()

	for {
		conn, err := listener.Accept()
		if err != nil {
			log.Println("listener.Accept occur error:", err)
			continue
		}

		go socketTelnetHandle(conn)
	}
}
Exemplo n.º 2
0
func newInfluxdbConnPool(name string, address string, connTimeout time.Duration, maxConns int, maxIdle int) *ConnPool {
	pool := NewConnPool(name, address, maxConns, maxIdle)
	pool.Username = g.Config().Influxdb.Username
	pool.Password = g.Config().Influxdb.Password
	pool.Database = g.Config().Influxdb.Database
	pool.Precision = "s"

	pool.New = func(connName string) (NConn, error) {
		nconn := InfluxdbClient{
			name:      connName,
			Address:   pool.Address,
			Username:  pool.Username,
			Password:  pool.Password,
			Database:  pool.Database,
			Precision: pool.Precision,
			Timeout:   connTimeout,
		}
		err := nconn.Connect()
		if err != nil {
			return nil, err
		}

		return nconn, nil
	}

	return pool
}
Exemplo n.º 3
0
func StartRpc() {
	if !g.Config().Rpc.Enabled {
		return
	}

	addr := g.Config().Rpc.Listen
	tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
	if err != nil {
		log.Fatalf("net.ResolveTCPAddr fail: %s", err)
	}

	listener, err := net.ListenTCP("tcp", tcpAddr)
	if err != nil {
		log.Fatalf("listen %s fail: %s", addr, err)
	} else {
		log.Println("rpc listening", addr)
	}

	server := rpc.NewServer()
	server.Register(new(Transfer))

	for {
		conn, err := listener.Accept()
		if err != nil {
			log.Println("listener.Accept occur error:", err)
			continue
		}
		// go rpc.ServeConn(conn)
		go server.ServeCodec(jsonrpc.NewServerCodec(conn))
	}
}
Exemplo n.º 4
0
// 将数据 打入 某个Graph的发送缓存队列, 具体是哪一个Graph 由一致性哈希 决定
// 如果正在数据迁移, 数据除了打到原有配置上 还要向新的配置上打一份(新老重叠时要去重,防止将一条数据向一台Graph上打两次)
func Push2GraphSendQueue(items []*cmodel.MetaData, migrating bool) {
	cfg := g.Config().Graph

	for _, item := range items {
		graphItem, err := convert2GraphItem(item)
		if err != nil {
			log.Println("E:", err)
			continue
		}
		pk := item.PK()

		// statistics. 为了效率,放到了这里,因此只有graph是enbale时才能trace
		proc.RecvDataTrace.Trace(pk, item)
		proc.RecvDataFilter.Filter(pk, item.Value, item)

		node, err := GraphNodeRing.GetNode(pk)
		if err != nil {
			log.Println("E:", err)
			continue
		}

		cnode := cfg.Cluster2[node]
		errCnt := 0
		for _, addr := range cnode.Addrs {
			Q := GraphQueues[node+addr]
			if !Q.PushFront(graphItem) {
				errCnt += 1
			}
		}

		// statistics
		if errCnt > 0 {
			proc.SendToGraphDropCnt.Incr()
		}

		if migrating {
			migratingNode, err := GraphMigratingNodeRing.GetNode(pk)
			if err != nil {
				log.Println("E:", err)
				continue
			}

			if node != migratingNode { // 数据迁移时,进行新老去重
				cnodem := cfg.ClusterMigrating2[migratingNode]
				errCnt := 0
				for _, addr := range cnodem.Addrs {
					MQ := GraphMigratingQueues[migratingNode+addr]
					if !MQ.PushFront(graphItem) {
						errCnt += 1
					}
				}

				// statistics
				if errCnt > 0 {
					proc.SendToGraphMigratingDropCnt.Incr()
				}
			}
		}
	}
}
Exemplo n.º 5
0
func configCommonRoutes() {
	http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
		w.Write([]byte("ok\n"))
	})

	http.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) {
		w.Write([]byte(fmt.Sprintf("%s\n", g.VERSION)))
	})

	http.HandleFunc("/workdir", func(w http.ResponseWriter, r *http.Request) {
		w.Write([]byte(fmt.Sprintf("%s\n", file.SelfDir())))
	})

	http.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) {
		RenderDataJson(w, g.Config())
	})

	http.HandleFunc("/config/reload", func(w http.ResponseWriter, r *http.Request) {
		if strings.HasPrefix(r.RemoteAddr, "127.0.0.1") {
			g.ParseConfig(g.ConfigFile)
			RenderDataJson(w, "ok")
		} else {
			RenderDataJson(w, "no privilege")
		}
	})
}
Exemplo n.º 6
0
func initSendQueues() {
	cfg := g.Config()
	for node, _ := range cfg.Judge.Cluster {
		Q := nlist.NewSafeListLimited(DefaultSendQueueMaxSize)
		JudgeQueues[node] = Q
	}
	if cfg.Influxdb.Enabled {
		for tnode, _ := range cfg.Influxdb.Cluster {
			Q := nlist.NewSafeListLimited(DefaultSendQueueMaxSize)
			InfluxdbQueues[tnode] = Q
		}
	}

	for node, nitem := range cfg.Graph.Cluster2 {
		for _, addr := range nitem.Addrs {
			Q := nlist.NewSafeListLimited(DefaultSendQueueMaxSize)
			GraphQueues[node+addr] = Q
		}
	}

	if cfg.Graph.Migrating && cfg.Graph.ClusterMigrating != nil {
		for node, cnode := range cfg.Graph.ClusterMigrating2 {
			for _, addr := range cnode.Addrs {
				Q := nlist.NewSafeListLimited(DefaultSendQueueMaxSize)
				GraphMigratingQueues[node+addr] = Q
			}
		}
	}
}
Exemplo n.º 7
0
// Judge定时任务, 将 Judge发送缓存中的数据 通过rpc连接池 发送到Judge
func forward2JudgeTask(Q *list.SafeListLimited, node string, concurrent int) {
	batch := g.Config().Judge.Batch // 一次发送,最多batch条数据
	addr := g.Config().Judge.Cluster[node]
	sema := nsema.NewSemaphore(concurrent)

	for {
		items := Q.PopBackBy(batch)
		count := len(items)
		if count == 0 {
			time.Sleep(DefaultSendTaskSleepInterval)
			continue
		}

		judgeItems := make([]*cmodel.JudgeItem, count)
		for i := 0; i < count; i++ {
			judgeItems[i] = items[i].(*cmodel.JudgeItem)
		}

		//	同步Call + 有限并发 进行发送
		sema.Acquire()
		go func(addr string, judgeItems []*cmodel.JudgeItem, count int) {
			defer sema.Release()

			resp := &cmodel.SimpleRpcResponse{}
			var err error
			sendOk := false
			for i := 0; i < 3; i++ { //最多重试3次
				err = JudgeConnPools.Call(addr, "Judge.Send", judgeItems, resp)
				if err == nil {
					sendOk = true
					break
				}
				time.Sleep(time.Millisecond * 10)
			}

			// statistics
			if !sendOk {
				log.Printf("send judge %s:%s fail: %v", node, addr, err)
				proc.SendToJudgeFailCnt.IncrBy(int64(count))
			} else {
				proc.SendToJudgeCnt.IncrBy(int64(count))
			}
		}(addr, judgeItems, count)
	}
}
Exemplo n.º 8
0
func initNodeRings() {
	cfg := g.Config()

	JudgeNodeRing = newConsistentHashNodesRing(cfg.Judge.Replicas, KeysOfMap(cfg.Judge.Cluster))
	GraphNodeRing = newConsistentHashNodesRing(cfg.Graph.Replicas, KeysOfMap(cfg.Graph.Cluster))
	if cfg.Graph.Migrating && cfg.Graph.ClusterMigrating != nil {
		GraphMigratingNodeRing = newConsistentHashNodesRing(cfg.Graph.Replicas, KeysOfMap(cfg.Graph.ClusterMigrating))
	}
}
Exemplo n.º 9
0
func startHttpServer() {
	if !g.Config().Http.Enabled {
		return
	}

	addr := g.Config().Http.Listen
	if addr == "" {
		return
	}

	configCommonRoutes()
	configProcHttpRoutes()
	configDebugHttpRoutes()
	configApiHttpRoutes()

	s := &http.Server{
		Addr:           addr,
		MaxHeaderBytes: 1 << 30,
	}

	log.Println("http.startHttpServer ok, listening", addr)
	log.Fatalln(s.ListenAndServe())
}
Exemplo n.º 10
0
// TODO 添加对发送任务的控制,比如stop等
func startSendTasks() {
	cfg := g.Config()
	// init semaphore
	influxdbConcurrent := cfg.Influxdb.MaxIdle
	judgeConcurrent := cfg.Judge.MaxIdle
	graphConcurrent := cfg.Graph.MaxIdle
	if influxdbConcurrent < 1 {
		influxdbConcurrent = 1
	}

	if judgeConcurrent < 1 {
		judgeConcurrent = 1
	}

	if graphConcurrent < 1 {
		graphConcurrent = 1
	}

	// init send go-routines
	if cfg.Influxdb.Enabled {
		for node, _ := range cfg.Influxdb.Cluster {
			queue := InfluxdbQueues[node]
			go forward2InfluxdbTask(queue, node, influxdbConcurrent)
		}
	}
	for node, _ := range cfg.Judge.Cluster {
		queue := JudgeQueues[node]
		go forward2JudgeTask(queue, node, judgeConcurrent)
	}

	for node, nitem := range cfg.Graph.Cluster2 {
		for _, addr := range nitem.Addrs {
			queue := GraphQueues[node+addr]
			go forward2GraphTask(queue, node, addr, graphConcurrent)
		}
	}

	if cfg.Graph.Migrating {
		for node, cnodem := range cfg.Graph.ClusterMigrating2 {
			for _, addr := range cnodem.Addrs {
				queue := GraphMigratingQueues[node+addr]
				go forward2GraphMigratingTask(queue, node, addr, graphConcurrent)
			}
		}
	}

}
Exemplo n.º 11
0
// Graph定时任务, 进行数据迁移时的 数据冗余发送
func forward2GraphMigratingTask(Q *list.SafeListLimited, node string, addr string, concurrent int) {
	batch := g.Config().Graph.Batch // 一次发送,最多batch条数据
	sema := nsema.NewSemaphore(concurrent)

	for {
		items := Q.PopBackBy(batch)
		count := len(items)
		if count == 0 {
			time.Sleep(DefaultSendTaskSleepInterval)
			continue
		}

		graphItems := make([]*cmodel.GraphItem, count)
		for i := 0; i < count; i++ {
			graphItems[i] = items[i].(*cmodel.GraphItem)
		}

		sema.Acquire()
		go func(addr string, graphItems []*cmodel.GraphItem, count int) {
			defer sema.Release()

			resp := &cmodel.SimpleRpcResponse{}
			var err error
			sendOk := false
			for i := 0; i < 3; i++ { //最多重试3次
				err = GraphMigratingConnPools.Call(addr, "Graph.Send", graphItems, resp)
				if err == nil {
					sendOk = true
					break
				}
				time.Sleep(time.Millisecond * 10) //发送失败了,睡10ms
			}

			// statistics
			if !sendOk {
				log.Printf("send to graph migrating %s:%s fail: %v", node, addr, err)
				proc.SendToGraphMigratingFailCnt.IncrBy(int64(count))
			} else {
				proc.SendToGraphMigratingCnt.IncrBy(int64(count))
			}
		}(addr, graphItems, count)
	}
}
Exemplo n.º 12
0
func initConnPools() {
	cfg := g.Config()

	if cfg.Influxdb.Enabled {
		influxdbInstances := nset.NewStringSet()
		for _, instance := range cfg.Influxdb.Cluster {
			influxdbInstances.Add(instance)
		}
		InfluxdbConnPools = cpool.CreateInfluxdbCliPools(cfg.Influxdb.MaxConns, cfg.Influxdb.MaxIdle,
			cfg.Influxdb.ConnTimeout, cfg.Influxdb.CallTimeout, influxdbInstances.ToSlice())

	}
	judgeInstances := nset.NewStringSet()
	for _, instance := range cfg.Judge.Cluster {
		judgeInstances.Add(instance)
	}
	JudgeConnPools = cpool.CreateSafeRpcConnPools(cfg.Judge.MaxConns, cfg.Judge.MaxIdle,
		cfg.Judge.ConnTimeout, cfg.Judge.CallTimeout, judgeInstances.ToSlice())

	// graph
	graphInstances := nset.NewSafeSet()
	for _, nitem := range cfg.Graph.Cluster2 {
		for _, addr := range nitem.Addrs {
			graphInstances.Add(addr)
		}
	}
	GraphConnPools = cpool.CreateSafeRpcConnPools(cfg.Graph.MaxConns, cfg.Graph.MaxIdle,
		cfg.Graph.ConnTimeout, cfg.Graph.CallTimeout, graphInstances.ToSlice())

	// graph migrating
	if cfg.Graph.Migrating && cfg.Graph.ClusterMigrating != nil {
		graphMigratingInstances := nset.NewSafeSet()
		for _, cnode := range cfg.Graph.ClusterMigrating2 {
			for _, addr := range cnode.Addrs {
				graphMigratingInstances.Add(addr)
			}
		}
		GraphMigratingConnPools = cpool.CreateSafeRpcConnPools(cfg.Graph.MaxConns, cfg.Graph.MaxIdle,
			cfg.Graph.ConnTimeout, cfg.Graph.CallTimeout, graphMigratingInstances.ToSlice())
	}
}
Exemplo n.º 13
0
// Tsdb定时任务, 将 Tsdb发送缓存中的数据 通过api连接池 发送到Tsdb
// 单个Cluster配置多个influxdb地址,修改为并发发送。
func forward2InfluxdbTask(Q *list.SafeListLimited, node string, concurrent int) {
	cfg := g.Config()
	batch := cfg.Influxdb.Batch // 一次发送,最多batch条数据
	sema := nsema.NewSemaphore(concurrent * len(cfg.Influxdb.Cluster2[node].Addrs))
	retry := cfg.Influxdb.MaxRetry

	for {
		items := Q.PopBackBy(batch)
		count := len(items)
		if count == 0 {
			time.Sleep(DefaultSendTaskSleepInterval)
			continue
		}
		pts := make([]*client.Point, count)
		for i := 0; i < count; i++ {
			pts[i] = items[i].(*client.Point)
		}
		for _, addr := range cfg.Influxdb.Cluster2[node].Addrs {
			sema.Acquire()
			go coreSend2Influxdb(addr, sema, retry, pts)
		}

	}
}
Exemplo n.º 14
0
// 将数据 打入所有的Tsdb的发送缓存队列, 相互备份
func Push2TsdbSendQueue(items []*cmodel.MetaData) {
	removeMetrics := g.Config().Influxdb.RemoveMetrics
	//log.Printf("Push2TsdbSendQueue")
	for _, item := range items {
		b, ok := removeMetrics[item.Metric]
		//log.Printf ("select:%V,%V,%V", b, ok,item )
		if b && ok {
			continue
		}
		influxPoint := Convert2InfluxPoint(item)
		errCnt := 0
		for _, Q := range InfluxdbQueues {
			if !Q.PushFront(influxPoint) {
				errCnt += 1
			}
		}

		// statistics
		if errCnt > 0 {
			proc.SendToInfluxdbDropCnt.Incr()
		}

	}
}
Exemplo n.º 15
0
func socketTelnetHandle(conn net.Conn) {
	defer conn.Close()

	items := []*cmodel.MetaData{}
	buf := bufio.NewReader(conn)

	cfg := g.Config()
	timeout := time.Duration(cfg.Socket.Timeout) * time.Second

	for {
		conn.SetReadDeadline(time.Now().Add(timeout))
		line, err := buf.ReadString('\n')
		if err != nil {
			break
		}

		line = strings.Trim(line, "\n")

		if line == "quit" {
			break
		}

		if line == "" {
			continue
		}

		t := strings.Fields(line)
		if len(t) < 2 {
			continue
		}

		cmd := t[0]

		if cmd != "update" {
			continue
		}

		item, err := convertLine2MetaData(t[1:])
		if err != nil {
			continue
		}

		items = append(items, item)
	}

	// statistics
	proc.SocketRecvCnt.IncrBy(int64(len(items)))
	proc.RecvCnt.IncrBy(int64(len(items)))

	if cfg.Graph.Enabled {
		sender.Push2GraphSendQueue(items, cfg.Graph.Migrating)
	}

	if cfg.Judge.Enabled {
		sender.Push2JudgeSendQueue(items)
	}
	if cfg.Influxdb.Enabled {
		sender.Push2TsdbSendQueue(items)
	}

	return

}
Exemplo n.º 16
0
// process new metric values
func RecvMetricValues(args []*cmodel.MetricValue, reply *cmodel.TransferResponse, from string) error {
	start := time.Now()
	reply.Invalid = 0

	items := []*cmodel.MetaData{}
	for _, v := range args {
		if v == nil {
			reply.Invalid += 1
			continue
		}

		// 历史遗留问题.
		// 老版本agent上报的metric=kernel.hostname的数据,其取值为string类型,现在已经不支持了;所以,这里硬编码过滤掉
		if v.Metric == "kernel.hostname" {
			reply.Invalid += 1
			continue
		}

		if v.Metric == "" || v.Endpoint == "" {
			reply.Invalid += 1
			continue
		}

		if v.Type != g.COUNTER && v.Type != g.GAUGE && v.Type != g.DERIVE {
			reply.Invalid += 1
			continue
		}

		if v.Value == "" {
			reply.Invalid += 1
			continue
		}

		if v.Step <= 0 {
			reply.Invalid += 1
			continue
		}

		if len(v.Metric)+len(v.Tags) > 510 {
			reply.Invalid += 1
			continue
		}

		// TODO 呵呵,这里需要再优雅一点
		now := start.Unix()
		if v.Timestamp <= 0 || v.Timestamp > now*2 {
			v.Timestamp = now
		}

		fv := &cmodel.MetaData{
			Metric:      v.Metric,
			Endpoint:    v.Endpoint,
			Timestamp:   v.Timestamp,
			Step:        v.Step,
			CounterType: v.Type,
			Tags:        cutils.DictedTagstring(v.Tags), //TODO tags键值对的个数,要做一下限制
		}

		valid := true
		var vv float64
		var err error

		switch cv := v.Value.(type) {
		case string:
			vv, err = strconv.ParseFloat(cv, 64)
			if err != nil {
				valid = false
			}
		case float64:
			vv = cv
		case int64:
			vv = float64(cv)
		default:
			valid = false
		}

		if !valid {
			reply.Invalid += 1
			continue
		}

		fv.Value = vv
		items = append(items, fv)
	}

	// statistics
	cnt := int64(len(items))
	proc.RecvCnt.IncrBy(cnt)
	if from == "rpc" {
		proc.RpcRecvCnt.IncrBy(cnt)
	} else if from == "http" {
		proc.HttpRecvCnt.IncrBy(cnt)
	}

	cfg := g.Config()

	if cfg.Graph.Enabled {
		sender.Push2GraphSendQueue(items, cfg.Graph.Migrating)
	}

	if cfg.Influxdb.Enabled {
		sender.Push2TsdbSendQueue(items)
	}

	if cfg.Judge.Enabled {
		sender.Push2JudgeSendQueue(items)
	}

	reply.Message = "ok"
	reply.Total = len(args)
	reply.Latency = (time.Now().UnixNano() - start.UnixNano()) / 1000000

	return nil
}