func (t *TcpReceiver) clearFootPrint(wg *sync.WaitGroup) { defer wg.Done() ch1 := make(chan bool) //用于安全退出 ch2 := time.After(time.Hour) //用于定时任务 go lib.HandleQuitSignal(func() { ch1 <- true }) loop: for { select { //监听一个chan以便安全退出 case <-ch1: break loop case <-ch2: //若这个case未执行完,而ch1已可读,select会保证这个case执行完 now := time.Now().Unix() t.mutex.Lock() for code, appear := range t.footPrint { if now-appear.Time >= 86400 { delete(t.footPrint, code) } } t.saveFootPrint() t.mutex.Unlock() ch2 = time.After(time.Hour) //用于定时任务 } } loglib.Info("clear footprint quit!") }
func (t *TcpReceiver) Start() { tcpAddr, err := net.ResolveTCPAddr("tcp4", t.receiveFromAddress) lib.CheckError(err) listener, err := net.ListenTCP("tcp", tcpAddr) lib.CheckError(err) wg := &sync.WaitGroup{} wg.Add(1) go t.clearFootPrint(wg) //主routine信号处理 go lib.HandleQuitSignal(func() { //接收到信号关闭listenner,此时Accept会马上返回一个nil 的conn listener.Close() loglib.Info("close tcp receiver's listener.") }) defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tcp receiver panic:%v", err)) } loglib.Info("wait connections finish...") wg.Wait() loglib.Info("all connections have been processed. quit.") close(t.buffer) //关闭chan t.saveFootPrint() t.wq.AllDone() }() for { conn, err := listener.Accept() if conn == nil { break } lib.CheckError(err) wg.Add(1) go t.handleConnnection(conn, wg) } }
func (this *HeartBeat) Run() { i := 0 //尝试注册3次 for i < 3 && !this.registerSelf(true) { i++ time.Sleep(10 * time.Second) } l, err := net.Listen("tcp", ":"+this.port) if err != nil { loglib.Error("heart beat " + err.Error()) return } defer l.Close() go lib.HandleQuitSignal(func() { this.registerSelf(false) l.Close() loglib.Info("close heart beat listener.") }) //heart beat 不是太重要,所以退出处理比较简单 for { conn, err := l.Accept() if conn == nil { //listener关闭 break } if err != nil { loglib.Error("heart beat " + err.Error()) break } go this.handleConnection(conn) } this.wq.AllDone() }
//重新保存先前失败的文档 func (this *MongoDbOutputer) retrySave(wg *sync.WaitGroup, routineId int) { var session *mgo.Session var coll *mgo.Collection //routine一般都要copy或clone session,clone能保证一致性 if this.session != nil { session = this.session.Clone() } defer func(){ if session != nil { session.Close() } wg.Done() loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d quit.", routineId)) }() var quit = false go lib.HandleQuitSignal(func(){ quit = true }) dateStr := "" loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d start", routineId)) for !quit { e := this.fileList.Remove() if e != nil { filename := e.Value.(string) b, err := ioutil.ReadFile(filename) if err != nil { if _, ok := err.(*os.PathError); !ok { this.fileList.PushBack(filename) //非路径错误,下次再试 } loglib.Error(fmt.Sprintf("load cache %s error:%v", filename, err)) }else{ m := bson.M{} err = json.Unmarshal(b, &m) if err != nil { loglib.Error(fmt.Sprintf("unmarshar %s error:%v", filename, err)) }else{ tp, _ := m["type"].(string) date, _ := m["date"].(string) if date != dateStr && session != nil { coll = session.DB(this.db + date).C(this.collection) //按天分库 dateStr = date } if tp == "bulk" { data, _ := m["data"].([]interface{}) err = this.bulkSaveBson(coll, data...) }else{ data, _ := m["data"].(map[string]interface{}) sel := bson.M{this.transactionIdKey : data[this.transactionIdKey]} up := bson.M{"$set" : data} _, err = this.upsertBson(coll, sel, up) } if err != nil { this.fileList.PushBack(filename) loglib.Error(fmt.Sprintf("re-save cache %s error:%v", filename, err)) if session.Ping() != nil { //refresh go-routine's session if possible this.reCloneRoutineSession(&session) if session.Ping() == nil { loglib.Info(fmt.Sprintf("retry routine %d re-conn", routineId)) } } }else{ err = os.Remove(filename) if err != nil { loglib.Error(fmt.Sprintf("remove file: %s error:%v", filename, err)); }else{ loglib.Info(fmt.Sprintf("cache file: %s send out", filename)); } } } } } time.Sleep(500 * time.Millisecond) } }
func (this *Tailler) taillingCurrent(receiveChan chan map[string]string) { var n_lines = "" if this.lineNum >= 0 { n_lines = fmt.Sprintf("+%d", this.lineNum+1) //略过已经tail过的行 } else { n_lines = "0" //从最后开始 } var quit = false //收尾工作 defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tailler panic:%v", err)) } //如果是quit,丢弃不完整的包 if quit { this.lineNum -= this.lineNum % this.recvBufSize } saveLineRecord(this.recordPath, this.currFile, this.lineNum) this.wq.AllDone() }() //启动时读取行号,以后都从首行开始 cmd := exec.Command("tail", "-F", "-n", n_lines, this.currFile) n_lines = "+1" stdout, err := cmd.StdoutPipe() if err != nil { loglib.Error("open pipe error") } //系统信号监听 go lib.HandleQuitSignal(func() { quit = true if cmd.Process != nil { cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 } }) //日志切割检测 go func() { nextHour := this.fileHour.Add(time.Hour) nextHourFile := this.getLogFileByTime(nextHour) timeToWait := 10 * time.Minute //到达下一小时后,等待日志文件的最长时间,10分钟 for { if quit { break } if lib.FileExists(nextHourFile) || time.Now().Sub(nextHour) > timeToWait { currFile := this.currFile totalLines := this.GetTotalLines(currFile) loglib.Info(fmt.Sprintf("log rotated! previous file: %s, total lines: %d", currFile, totalLines)) //在kill前进行文件切换,避免kill后新的tail启动时文件名还是旧的 this.fileHour = nextHour this.currFile = nextHourFile nextHour = nextHour.Add(time.Hour) nextHourFile = this.getLogFileByTime(nextHour) //发现日志切割,等待1分钟 i := 0 done := false for { if this.lineNum >= totalLines { done = true } if done || i > 60 { if cmd.Process != nil { cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 } if done { loglib.Info("finish tail " + currFile) } else { loglib.Info("tail " + currFile + " timeout") } break } i++ time.Sleep(time.Second) } } time.Sleep(time.Second) } }() outer: for { currFile := this.currFile //缓存当前tail的文件名 hourStr := this.fileHour.Format(this.hourStrFmt) cmd.Start() loglib.Info("begin current log: " + currFile) rd := bufio.NewReader(stdout) for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n') { //fmt.Print(line) if quit { break outer } this.lineNum++ m := map[string]string{"hour": hourStr, "line": line} receiveChan <- m if this.lineNum%this.recvBufSize == 0 { saveLineRecord(this.recordPath, currFile, this.lineNum) } } if err := cmd.Wait(); err != nil { loglib.Info("wait sys tail error!" + err.Error()) } loglib.Info(fmt.Sprintf("%s tailed %d lines", currFile, this.lineNum)) if quit { break } // 完整tail一个文件 m := map[string]string{"hour": hourStr, "line": changeStr} receiveChan <- m saveLineRecord(this.recordPath, currFile, this.lineNum) //begin a new file this.lineNum = 0 cmd = exec.Command("tail", "-F", "-n", n_lines, this.currFile) stdout, err = cmd.StdoutPipe() if err != nil { loglib.Error("open pipe error") break } } }
func (this *Tailler) taillingPrevious(filePath string, lineNum int, hourStr string, receiveChan chan map[string]string) bool { var n_lines = "" if lineNum >= 0 { n_lines = fmt.Sprintf("+%d", lineNum+1) //略过已经tail过的行 } else { n_lines = "0" //从最后开始 } loglib.Info("begin previous log: " + filePath + " from line: " + n_lines) var quit = false //收尾工作 defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tailler panic:%v", err)) } //如果是quit,丢弃不完整的包 if quit { lineNum -= lineNum % this.recvBufSize } saveLineRecord(this.recordPath, filePath, lineNum) }() //启动时读取行号,以后都从首行开始 cmd := exec.Command("tail", "-n", n_lines, filePath) stdout, err := cmd.StdoutPipe() if err != nil { loglib.Error("open pipe error") } //系统信号监听 go lib.HandleQuitSignal(func() { quit = true if cmd.Process != nil { cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 } }) cmd.Start() rd := bufio.NewReader(stdout) for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n') { //fmt.Print(line) if quit { break } lineNum++ m := map[string]string{"hour": hourStr, "line": line} receiveChan <- m if lineNum%this.recvBufSize == 0 { saveLineRecord(this.recordPath, filePath, lineNum) } } if err := cmd.Wait(); err != nil { loglib.Info("wait sys tail error!" + err.Error()) } loglib.Info(fmt.Sprintf("%s tailed %d lines", filePath, lineNum)) if !quit { // 完整tail一个文件 m := map[string]string{"hour": hourStr, "line": changeStr} receiveChan <- m saveLineRecord(this.recordPath, filePath, lineNum) } return quit }
func (t *TcpReceiver) handleConnnection(conn net.Conn, wg *sync.WaitGroup) { defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tcp receiver connection panic:%v", err)) } conn.Close() wg.Done() }() /* 用于标识收到退出信号后,能否直接退出 只要接收信号时,包没有收完,都是可退出的, 发送方会缓存以后重传; 如果收完了就不能直接退出,可能包已传给下一级处理但是 却告诉发送方发送失败 */ var quit = false //用于标识是否要退出 go lib.HandleQuitSignal(func() { //关闭连接,避免阻塞在网络io上 conn.Close() quit = true }) request := make([]byte, 512*1024) //缓冲为512k var packLen int = 0 currLen := 0 var b = new(bytes.Buffer) var content = new(bytes.Buffer) inAddr := conn.RemoteAddr().String() parts := strings.Split(inAddr, ":") inIp := parts[0] packId := "unkown" var routeInfo map[string]string var rePull = false //是否补拉,如果是补拉就不做重复包检验 loglib.Info("incoming: " + inAddr) outer: for !quit { st := time.Now() if packLen == 0 { conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) time1 := time.Now() //时间打点 // read zlib pack header length buf := make([]byte, 4) _, err := conn.Read(buf) if err != nil { loglib.Warning(fmt.Sprintf("conn:%s, get header len, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time1))) break } l, _ := binary.Uvarint(buf) headerLen := int(l) //get pack header headerBuf := make([]byte, headerLen) time2 := time.Now() _, err = conn.Read(headerBuf) if err != nil { loglib.Warning(fmt.Sprintf("conn:%s, get header, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time2))) break } //是否补拉 route0 := tcp_pack.ParseHeader(headerBuf) if v, ok := route0["repull"]; ok && v == "1" { rePull = true } else { rePull = false } buf = append(buf, headerBuf...) header, _, err := tcp_pack.ExtractHeader(buf) if err != nil { loglib.Error("wrong format header " + string(headerBuf) + " " + err.Error()) conn.Write([]byte("wrong header")) break } packId = tcp_pack.GetPackId(buf) packLen = header.PackLen currLen = 0 routeInfo = make(map[string]string) b = new(bytes.Buffer) content = new(bytes.Buffer) loglib.Info(fmt.Sprintf("conn:%s, start receive pack %s, pack len:%d, header len:%d, header elapse:%s", inAddr, packId, packLen, headerLen, time.Now().Sub(time1))) b.Write(buf) routeInfo["ip"] = lib.GetIp() routeInfo["stage"] = "tcp recv" routeInfo["st"] = st.Format("2006-01-02 15:04:05.000") } //读包体的超时 conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) time3 := time.Now() //read enough bytes for currLen < packLen { requestLen, err := conn.Read(request) if requestLen == 0 || err != nil { //sender有重发机制,所以可丢弃 packLen = 0 //设为0以便读取新的包 ed := time.Now() loglib.Warning(fmt.Sprintf("conn:%s, not full! ip:%s, packid:%s, received:%d, end recv:%s, elapse:%s, body elapse:%s, error:%s", inAddr, inIp, packId, currLen, ed, ed.Sub(st), ed.Sub(time3), err.Error())) break outer //连接出错直接跳出外层循环 } currLen += requestLen content.Write(request[:requestLen]) } if packLen > 0 && currLen >= packLen { //收完马上应答 _, err := conn.Write([]byte("ok")) if err != nil { loglib.Warning(fmt.Sprintf("ip:%s, packid:%s received, but response back error:%s", inIp, packId, err.Error())) } else { loglib.Info(fmt.Sprintf("conn:%s, response to packid:%s", inAddr, packId)) } //避免收到重复包(补拉例外) appeared, ok, code := t.hasAppeared(content) if !ok || rePull { ed := time.Now() routeInfo["ed"] = ed.Format("2006-01-02 15:04:05.000") routeInfo["elapse"] = ed.Sub(st).String() b.Write(content.Bytes()) vbytes := tcp_pack.Packing(b.Bytes(), routeInfo, true) b = bytes.NewBuffer(vbytes) t.buffer <- *b packAppear := PackAppear{time.Now().Unix(), packId} t.mutex.Lock() t.footPrint[code] = packAppear //这里挂过 t.mutex.Unlock() loglib.Info(fmt.Sprintf("conn:%s, finish ip:%s, packid:%s, repull:%v, received:%d, elapse:%s, body elapse:%s", inAddr, inIp, packId, rePull, currLen, ed.Sub(st), ed.Sub(time3))) } else { loglib.Info(fmt.Sprintf("conn:%s, pack %s repeat %s already appear at %s", inAddr, packId, appeared.Id, time.Unix(appeared.Time, 0))) } packLen = 0 } } loglib.Info("conn finish: " + inAddr) }
//goroutine func (s *Sender) Start() { // conn := s.getConnection() //初始化fileCacheList once.Do(s.reloadFileCache) //收尾工作 defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("sender %d panic:%v", s.id, err)) } s.saveBufferInChan() //s.saveMemCache() s.connection.close() s.wq.AllDone() }() go s.pickPacks() //var connLost = 0 var quit = false go lib.HandleQuitSignal(func() { quit = true s.connection.close() }) var sendInterval = time.Duration(2000) //间隔稍大,避免发送文件缓存时因无连接或其他错误进入死循环 var timeoutChan = time.After(sendInterval * time.Millisecond) for !quit { select { case b := <-s.memBuffer: //send b result := s.sendBuffer(b) if result == false { //改为直接放入文件缓存 s.writeToFile(b) } case <-timeoutChan: timeoutChan = time.After(sendInterval * time.Millisecond) // send from file e := fileList.Remove() if e != nil { // file list is not empty filename := e.Value.(string) // fmt.Println("sender ",s.id,": get file :",filename) data, err := ioutil.ReadFile(filename) if err != nil { // fmt.Println("sender ",s.id,":",err) if _, ok := err.(*os.PathError); !ok { fileList.PushBack(filename) } loglib.Error(fmt.Sprintf("sender%d read file cache %s error:%s", s.id, filename, err.Error())) } else { packId := tcp_pack.GetPackId(data) //debug info loglib.Info(fmt.Sprintf("sender%d read pack %s from file: %s, len: %d", s.id, packId, filename, len(data))) //debug info result := s.sendData2(data) if result == true { // s.fileCacheList.Remove(front) // log.Println("sender ",s.id,":removed file:",filename, "for pack", packId)//debug info err = os.Remove(filename) lib.CheckError(err) timeoutChan = time.After(time.Millisecond) //发送成功,不用再等待 } else { fileList.PushBack(filename) // fmt.Println("sender ",s.id,": pushback file :",filename) } } } } } }