Пример #1
0
func proxy(method string, args interface{}, reply interface{}) error {
	// 随机遍历hbs列表,直到数据发送成功 或者 遍历完
	err := fmt.Errorf("proxy connections not available")
	sendOk := false
	rint := rand.Int()
	for i := 0; i < HbsNum && !sendOk; i++ {
		idx := (i + rint) % HbsNum
		host := HbsHostnames[idx]
		addr := HbsMap[host]

		// 过滤掉建连缓慢的host, 否则会严重影响发送速率
		key := addr + "." + method
		cc := pfc.GetCounterCount(key)
		if cc >= HbsMaxConns {
			continue
		}
		pfc.Counter(key, 1)
		err = ConnPools.Call(addr, method, args, reply)
		pfc.Counter(key, -1)

		if err == nil {
			pfc.Meter(key+".ok", 1)
			sendOk = true
		} else {
			pfc.Meter(key+".error", 1)
		}
	}
	return err
}
Пример #2
0
func socketTelnetHandle(conn net.Conn) {
	defer conn.Close()

	items := []*cmodel.MetaData{}
	buf := bufio.NewReader(conn)

	cfg := g.Config()
	timeout := time.Duration(cfg.Socket.Timeout) * time.Second

	for {
		conn.SetReadDeadline(time.Now().Add(timeout))
		line, err := buf.ReadString('\n')
		if err != nil {
			break
		}

		line = strings.Trim(line, "\n")

		if line == "quit" {
			break
		}

		if line == "" {
			continue
		}

		t := strings.Fields(line)
		if len(t) < 2 {
			continue
		}

		cmd := t[0]

		if cmd != "update" {
			continue
		}

		item, err := convertLine2MetaData(t[1:])
		if err != nil {
			continue
		}

		items = append(items, item)
	}

	// statistics
	count := int64(len(items))
	pfc.Meter("SocketRecv", count)
	pfc.Meter("Recv", count)

	if cfg.Transfer.Enabled {
		sender.Push2SendQueue(items)
	}

	return
}
Пример #3
0
func net_task_worker(idx int, ch chan *Net_task_t, client **rpc.Client, addr string) {
	var err error
	for {
		select {
		case task := <-ch:
			if task.Method == NET_TASK_M_SEND {
				if err = send_data(client, task.Key, addr); err != nil {
					pfc.Meter("migrate.send.err", 1)
					atomic.AddUint64(&stat_cnt[SEND_S_ERR], 1)
				} else {
					pfc.Meter("migrate.send.ok", 1)
					atomic.AddUint64(&stat_cnt[SEND_S_SUCCESS], 1)
				}
			} else if task.Method == NET_TASK_M_QUERY {
				if err = query_data(client, addr, task.Args, task.Reply); err != nil {
					pfc.Meter("migrate.query.err", 1)
					atomic.AddUint64(&stat_cnt[QUERY_S_ERR], 1)
				} else {
					pfc.Meter("migrate.query.ok", 1)
					atomic.AddUint64(&stat_cnt[QUERY_S_SUCCESS], 1)
				}
			} else if task.Method == NET_TASK_M_PULL {
				if atomic.LoadInt32(&flushrrd_timeout) != 0 {
					// hope this more faster than fetch_rrd
					if err = send_data(client, task.Key, addr); err != nil {
						pfc.Meter("migrate.sendbusy.err", 1)
						atomic.AddUint64(&stat_cnt[SEND_S_ERR], 1)
					} else {
						pfc.Meter("migrate.sendbusy.ok", 1)
						atomic.AddUint64(&stat_cnt[SEND_S_SUCCESS], 1)
					}
				} else {
					if err = fetch_rrd(client, task.Key, addr); err != nil {
						if os.IsNotExist(err) {
							pfc.Meter("migrate.scprrd.null", 1)
							//文件不存在时,直接将缓存数据刷入本地
							atomic.AddUint64(&stat_cnt[FETCH_S_ISNOTEXIST], 1)
							store.GraphItems.SetFlag(task.Key, 0)
							CommitByKey(task.Key)
						} else {
							pfc.Meter("migrate.scprrd.err", 1)
							//warning:其他异常情况,缓存数据会堆积
							atomic.AddUint64(&stat_cnt[FETCH_S_ERR], 1)
						}
					} else {
						pfc.Meter("migrate.scprrd.ok", 1)
						atomic.AddUint64(&stat_cnt[FETCH_S_SUCCESS], 1)
					}
				}
			} else {
				err = errors.New("error net task method")
			}
			if task.Done != nil {
				task.Done <- err
			}
		}
	}
}
Пример #4
0
func basic() {
	for _ = range time.Tick(time.Second * time.Duration(10)) {
		// (常用) Meter,用于累加求和、计算变化率。使用场景如,统计首页访问次数、gvm的CG次数等。
		pv := int64(rand.Int() % 100)
		pfc.Meter("test.meter", pv)
		pfc.Meter("test.meter.2", pv-50)

		// (常用) Gauge,用于保存数值类型的瞬时记录值。使用场景如,统计队列长度、统计CPU使用率等
		queueSize := int64(rand.Int()%100 - 50)
		pfc.Gauge("test.gauge", queueSize)

		cpuUtil := float64(rand.Int()%10000) / float64(100)
		pfc.GaugeFloat64("test.gauge.float64", cpuUtil)
	}
}
Пример #5
0
// status calc
func monitor() {
	startTs := time.Now().Unix()
	_monitor()
	endTs := time.Now().Unix()
	log.Printf("monitor, startTs %s, time-consuming %d sec\n", ntime.FormatTs(startTs), endTs-startTs)

	// statistics
	pfc.Meter("MonitorCronCnt", 1)
	pfc.Gauge("MonitorCronTs", endTs-startTs)
}
Пример #6
0
func Push2SendQueue(items []*cmodel.MetaData) {
	for _, item := range items {

		// statistics
		pk := item.PK()
		g.RecvDataTrace.Trace(pk, item)
		g.RecvDataFilter.Filter(pk, item.Value, item)

		isOk := SenderQueue.PushFront(item)

		// statistics
		if !isOk {
			pfc.Meter("SendDrop", 1)
		}
	}
}
Пример #7
0
// TODO addr to node
func reconnection(client **rpc.Client, addr string) {
	pfc.Meter("migrate.reconnection."+addr, 1)

	var err error

	atomic.AddUint64(&stat_cnt[CONN_S_ERR], 1)
	if *client != nil {
		(*client).Close()
	}

	*client, err = dial(addr, time.Second)
	atomic.AddUint64(&stat_cnt[CONN_S_DIAL], 1)

	for err != nil {
		//danger!! block routine
		time.Sleep(time.Millisecond * 500)
		*client, err = dial(addr, time.Second)
		atomic.AddUint64(&stat_cnt[CONN_S_DIAL], 1)
	}
}
Пример #8
0
// alarm judge
func alarmJudge() {
	interval := time.Duration(10) * time.Second
	for {
		time.Sleep(interval)
		var content bytes.Buffer

		keys := alarmCache.Keys()
		if len(keys) == 0 {
			continue
		}
		for _, key := range keys {
			aitem, found := alarmCache.GetAndRemove(key)
			if !found {
				continue
			}
			content.WriteString(aitem.(*Alarm).String() + "\n")
		}

		if content.Len() < 6 {
			return
		}

		cfg := g.Config()
		// mail
		if cfg.Mail.Enable {
			hn, _ := os.Hostname()
			mailContent := formAlarmMailContent(cfg.Mail.Receivers, "AntEye.Alarm.From.["+hn+"]",
				content.String(), "AntEye")
			err := sendMail(cfg.Mail.Url, mailContent)
			if err != nil {
				log.Println("alarm send mail error, mail:", mailContent, "", err)
			} else {
				// statistics
				pfc.Meter("MonitorAlarmMail", 1)
			}
		}

		// sms
		if cfg.Sms.Enable {
			smsContent := formAlarmSmsContent(cfg.Sms.Receivers, content.String(), "AntEye")
			err := sendSms(cfg.Sms.Url, smsContent)
			if err != nil {
				log.Println("alarm send sms error, sms:", smsContent, "", err)
			} else {
				// statistics
				pfc.Meter("MonitorAlarmSms", 1)
			}
		}

		// callback
		if cfg.Callback.Enable {
			cbc := content.String()
			err := alarmCallback(cfg.Callback.Url, cbc)
			if err != nil {
				log.Println("alarm callback error, callback:", cfg.Callback, ", content:", cbc, "", err)
			} else {
				// statistics
				pfc.Meter("MonitorAlarmCallback", 1)
			}
		}
	}
}
Пример #9
0
// process new metric values
func RecvMetricValues(args []*cmodel.MetricValue, reply *g.TransferResp, from string) error {
	start := time.Now()
	reply.ErrInvalid = 0

	items := []*cmodel.MetaData{}
	for _, v := range args {
		if v == nil {
			reply.ErrInvalid += 1
			continue
		}

		// 历史遗留问题.
		// 老版本agent上报的metric=kernel.hostname的数据,其取值为string类型,现在已经不支持了;所以,这里硬编码过滤掉
		if v.Metric == "kernel.hostname" {
			reply.ErrInvalid += 1
			continue
		}

		if v.Metric == "" || v.Endpoint == "" {
			reply.ErrInvalid += 1
			continue
		}

		if v.Type != g.COUNTER && v.Type != g.GAUGE && v.Type != g.DERIVE {
			reply.ErrInvalid += 1
			continue
		}

		if v.Value == "" {
			reply.ErrInvalid += 1
			continue
		}

		if v.Step <= 0 {
			reply.ErrInvalid += 1
			continue
		}

		if len(v.Metric)+len(v.Tags) > 510 {
			reply.ErrInvalid += 1
			continue
		}

		errtags, tags := cutils.SplitTagsString(v.Tags)
		if errtags != nil {
			reply.ErrInvalid += 1
			continue
		}

		// TODO 呵呵,这里需要再优雅一点
		now := start.Unix()
		if v.Timestamp <= 0 || v.Timestamp > now*2 {
			v.Timestamp = now
		}

		fv := &cmodel.MetaData{
			Metric:      v.Metric,
			Endpoint:    v.Endpoint,
			Timestamp:   v.Timestamp,
			Step:        v.Step,
			CounterType: v.Type,
			Tags:        tags, //TODO tags键值对的个数,要做一下限制
		}

		valid := true
		var vv float64
		var err error

		switch cv := v.Value.(type) {
		case string:
			vv, err = strconv.ParseFloat(cv, 64)
			if err != nil {
				valid = false
			}
		case float64:
			vv = cv
		case int64:
			vv = float64(cv)
		default:
			valid = false
		}

		if !valid {
			reply.ErrInvalid += 1
			continue
		}

		fv.Value = vv
		items = append(items, fv)
	}

	// statistics
	cnt := int64(len(items))
	pfc.Meter("Recv", cnt)
	if from == "rpc" {
		pfc.Meter("RpcRecv", cnt)
	} else if from == "http" {
		pfc.Meter("HttpRecv", cnt)
	}

	cfg := g.Config()
	if cfg.Transfer.Enabled {
		sender.Push2SendQueue(items)
	}

	reply.Msg = "ok"
	reply.Total = len(args)
	reply.Latency = (time.Now().UnixNano() - start.UnixNano()) / 1000000

	return nil
}
Пример #10
0
func forward2TransferTask(Q *nlist.SafeListLimited, concurrent int32) {
	cfg := g.Config()
	batch := int(cfg.Transfer.Batch)
	maxConns := int64(cfg.Transfer.MaxConns)
	retry := int(cfg.Transfer.Retry)
	if retry < 1 {
		retry = 1
	}

	sema := nsema.NewSemaphore(int(concurrent))
	transNum := len(TransferHostnames)

	for {
		items := Q.PopBackBy(batch)
		count := len(items)
		if count == 0 {
			time.Sleep(time.Millisecond * 50)
			continue
		}

		transItems := make([]*cmodel.MetricValue, count)
		for i := 0; i < count; i++ {
			transItems[i] = convert(items[i].(*cmodel.MetaData))
		}

		sema.Acquire()
		go func(transItems []*cmodel.MetricValue, count int) {
			defer sema.Release()
			var err error

			// 随机遍历transfer列表,直到数据发送成功 或者 遍历完;随机遍历,可以缓解慢transfer
			resp := &g.TransferResp{}
			sendOk := false

			for j := 0; j < retry && !sendOk; j++ {
				rint := rand.Int()
				for i := 0; i < transNum && !sendOk; i++ {
					idx := (i + rint) % transNum
					host := TransferHostnames[idx]
					addr := TransferMap[host]

					// 过滤掉建连缓慢的host, 否则会严重影响发送速率
					cc := pfc.GetCounterCount(host)
					if cc >= maxConns {
						continue
					}

					pfc.Counter(host, 1)
					err = SenderConnPools.Call(addr, "Transfer.Update", transItems, resp)
					pfc.Counter(host, -1)

					if err == nil {
						sendOk = true
						// statistics
						TransferSendCnt[host].IncrBy(int64(count))
					} else {
						// statistics
						TransferSendFailCnt[host].IncrBy(int64(count))
					}
				}
			}

			// statistics
			if !sendOk {
				if cfg.Debug {
					log.Printf("send to transfer fail, connpool:%v", SenderConnPools.Proc())
				}
				pfc.Meter("SendFail", int64(count))
			} else {
				pfc.Meter("Send", int64(count))
			}
		}(transItems, count)
	}
}