//向monitor注册或取消注册,reg为true表示注册,否则是取消注册,返回true表示成功 func (this *HeartBeat) registerSelf(reg bool) bool { conn, err := lib.GetConnection(this.monitorAddr) if err != nil { return false } defer conn.Close() req := "register" if !reg { req = "unregister" } m := map[string]string{"req": req, "ip": lib.GetIp(), "port": this.port, "hostname": lib.GetHostname(), "role": this.role} msg, err := json.Marshal(m) if err != nil { loglib.Error("marshal " + req + " info error " + err.Error()) return false } _, err = conn.Write(tcp_pack.Pack(msg)) if err != nil { loglib.Error("send " + req + " info failed" + err.Error()) return false } else { plen, ret := tcp_pack.UnPack(conn) if plen > 0 { err = json.Unmarshal(ret, &m) r, ok := m["err"] if err == nil && ok && r == "0" { loglib.Info(req + " to monitor success!") return true } loglib.Error(req + " heart beat failed!") } } return false }
//goroutine //clear list & zipping & send_to_buffer func (r Receiver) writeList() { //收尾工作 defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("receiver panic:%v", err)) } close(r.sendBuffer) }() st := time.Now() var nLines = 0 var id = r.initId() ip := lib.GetIp() var changed = false for logMap := range r.receiveChan { logLine := logMap["line"] changed = false if logLine == "logfile changed" { changed = true } else { r.logList.PushBack(logLine) } nLines = r.logList.Len() //达到指定行数或发现日志rotate //因此每小时只有最后一个包比listBufferSize小 //如果quit时包小于listBufferSize就丢弃,重启后再读 if nLines >= r.listBufferSize || changed { hour := logMap["hour"] repull, ok := logMap["repull"] //兼容补拉 b := r.clearList() //r.sendBuffer <- b ed := time.Now() elapse := ed.Sub(st) loglib.Info(fmt.Sprintf("add a pack, id: %s_%d, lines:%d, elapse: %s", hour, id, nLines, elapse)) //route信息 m := make(map[string]string) m["ip"] = ip m["hour"] = hour m["id"] = fmt.Sprintf("%d", id) m["lines"] = fmt.Sprintf("%d", nLines) m["stage"] = "make pack" m["st"] = st.Format("2006-01-02 15:04:05.000") m["ed"] = ed.Format("2006-01-02 15:04:05.000") m["elapse"] = elapse.String() if ok && repull == "1" { m["repull"] = "1" } if changed { m["done"] = "1" //这种空包用于给那些日志行数正好是listBufferSize倍数的小时标记结束 //设置repull为1以便空包能够不被拦截 if nLines == 0 { m["repull"] = "1" } } vbytes := tcp_pack.Packing(b.Bytes(), m, false) b.Reset() b.Write(vbytes) r.sendBuffer <- b id++ st = time.Now() nLines = 0 } if changed { id = 1 //每小时id刷新 } } if nLines > 0 { loglib.Info(fmt.Sprintf("receiver abandon %d lines", nLines)) } }
func (t *TcpReceiver) handleConnnection(conn net.Conn, wg *sync.WaitGroup) { defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tcp receiver connection panic:%v", err)) } conn.Close() wg.Done() }() /* 用于标识收到退出信号后,能否直接退出 只要接收信号时,包没有收完,都是可退出的, 发送方会缓存以后重传; 如果收完了就不能直接退出,可能包已传给下一级处理但是 却告诉发送方发送失败 */ var quit = false //用于标识是否要退出 go lib.HandleQuitSignal(func() { //关闭连接,避免阻塞在网络io上 conn.Close() quit = true }) request := make([]byte, 512*1024) //缓冲为512k var packLen int = 0 currLen := 0 var b = new(bytes.Buffer) var content = new(bytes.Buffer) inAddr := conn.RemoteAddr().String() parts := strings.Split(inAddr, ":") inIp := parts[0] packId := "unkown" var routeInfo map[string]string var rePull = false //是否补拉,如果是补拉就不做重复包检验 loglib.Info("incoming: " + inAddr) outer: for !quit { st := time.Now() if packLen == 0 { conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) time1 := time.Now() //时间打点 // read zlib pack header length buf := make([]byte, 4) _, err := conn.Read(buf) if err != nil { loglib.Warning(fmt.Sprintf("conn:%s, get header len, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time1))) break } l, _ := binary.Uvarint(buf) headerLen := int(l) //get pack header headerBuf := make([]byte, headerLen) time2 := time.Now() _, err = conn.Read(headerBuf) if err != nil { loglib.Warning(fmt.Sprintf("conn:%s, get header, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time2))) break } //是否补拉 route0 := tcp_pack.ParseHeader(headerBuf) if v, ok := route0["repull"]; ok && v == "1" { rePull = true } else { rePull = false } buf = append(buf, headerBuf...) header, _, err := tcp_pack.ExtractHeader(buf) if err != nil { loglib.Error("wrong format header " + string(headerBuf) + " " + err.Error()) conn.Write([]byte("wrong header")) break } packId = tcp_pack.GetPackId(buf) packLen = header.PackLen currLen = 0 routeInfo = make(map[string]string) b = new(bytes.Buffer) content = new(bytes.Buffer) loglib.Info(fmt.Sprintf("conn:%s, start receive pack %s, pack len:%d, header len:%d, header elapse:%s", inAddr, packId, packLen, headerLen, time.Now().Sub(time1))) b.Write(buf) routeInfo["ip"] = lib.GetIp() routeInfo["stage"] = "tcp recv" routeInfo["st"] = st.Format("2006-01-02 15:04:05.000") } //读包体的超时 conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) time3 := time.Now() //read enough bytes for currLen < packLen { requestLen, err := conn.Read(request) if requestLen == 0 || err != nil { //sender有重发机制,所以可丢弃 packLen = 0 //设为0以便读取新的包 ed := time.Now() loglib.Warning(fmt.Sprintf("conn:%s, not full! ip:%s, packid:%s, received:%d, end recv:%s, elapse:%s, body elapse:%s, error:%s", inAddr, inIp, packId, currLen, ed, ed.Sub(st), ed.Sub(time3), err.Error())) break outer //连接出错直接跳出外层循环 } currLen += requestLen content.Write(request[:requestLen]) } if packLen > 0 && currLen >= packLen { //收完马上应答 _, err := conn.Write([]byte("ok")) if err != nil { loglib.Warning(fmt.Sprintf("ip:%s, packid:%s received, but response back error:%s", inIp, packId, err.Error())) } else { loglib.Info(fmt.Sprintf("conn:%s, response to packid:%s", inAddr, packId)) } //避免收到重复包(补拉例外) appeared, ok, code := t.hasAppeared(content) if !ok || rePull { ed := time.Now() routeInfo["ed"] = ed.Format("2006-01-02 15:04:05.000") routeInfo["elapse"] = ed.Sub(st).String() b.Write(content.Bytes()) vbytes := tcp_pack.Packing(b.Bytes(), routeInfo, true) b = bytes.NewBuffer(vbytes) t.buffer <- *b packAppear := PackAppear{time.Now().Unix(), packId} t.mutex.Lock() t.footPrint[code] = packAppear //这里挂过 t.mutex.Unlock() loglib.Info(fmt.Sprintf("conn:%s, finish ip:%s, packid:%s, repull:%v, received:%d, elapse:%s, body elapse:%s", inAddr, inIp, packId, rePull, currLen, ed.Sub(st), ed.Sub(time3))) } else { loglib.Info(fmt.Sprintf("conn:%s, pack %s repeat %s already appear at %s", inAddr, packId, appeared.Id, time.Unix(appeared.Time, 0))) } packLen = 0 } } loglib.Info("conn finish: " + inAddr) }