Beispiel #1
0
func (t *TcpReceiver) clearFootPrint(wg *sync.WaitGroup) {
	defer wg.Done()

	ch1 := make(chan bool)       //用于安全退出
	ch2 := time.After(time.Hour) //用于定时任务
	go lib.HandleQuitSignal(func() {
		ch1 <- true
	})

loop:
	for {
		select {
		//监听一个chan以便安全退出
		case <-ch1:
			break loop
		case <-ch2:
			//若这个case未执行完,而ch1已可读,select会保证这个case执行完
			now := time.Now().Unix()
			t.mutex.Lock()
			for code, appear := range t.footPrint {
				if now-appear.Time >= 86400 {
					delete(t.footPrint, code)
				}
			}
			t.saveFootPrint()
			t.mutex.Unlock()
			ch2 = time.After(time.Hour) //用于定时任务
		}
	}
	loglib.Info("clear footprint quit!")
}
Beispiel #2
0
func (t *TcpReceiver) Start() {

	tcpAddr, err := net.ResolveTCPAddr("tcp4", t.receiveFromAddress)
	lib.CheckError(err)
	listener, err := net.ListenTCP("tcp", tcpAddr)
	lib.CheckError(err)

	wg := &sync.WaitGroup{}

	wg.Add(1)
	go t.clearFootPrint(wg)

	//主routine信号处理
	go lib.HandleQuitSignal(func() {
		//接收到信号关闭listenner,此时Accept会马上返回一个nil 的conn
		listener.Close()
		loglib.Info("close tcp receiver's listener.")
	})

	defer func() {
		if err := recover(); err != nil {
			loglib.Error(fmt.Sprintf("tcp receiver panic:%v", err))
		}

		loglib.Info("wait connections finish...")
		wg.Wait()
		loglib.Info("all connections have been processed. quit.")
		close(t.buffer) //关闭chan
		t.saveFootPrint()

		t.wq.AllDone()

	}()

	for {
		conn, err := listener.Accept()
		if conn == nil {
			break
		}
		lib.CheckError(err)
		wg.Add(1)
		go t.handleConnnection(conn, wg)
	}

}
Beispiel #3
0
func (this *HeartBeat) Run() {
	i := 0

	//尝试注册3次
	for i < 3 && !this.registerSelf(true) {
		i++
		time.Sleep(10 * time.Second)
	}

	l, err := net.Listen("tcp", ":"+this.port)
	if err != nil {
		loglib.Error("heart beat " + err.Error())
		return
	}
	defer l.Close()

	go lib.HandleQuitSignal(func() {
		this.registerSelf(false)
		l.Close()
		loglib.Info("close heart beat listener.")
	})
	//heart beat 不是太重要,所以退出处理比较简单
	for {
		conn, err := l.Accept()
		if conn == nil {
			//listener关闭
			break
		}
		if err != nil {
			loglib.Error("heart beat " + err.Error())
			break
		}
		go this.handleConnection(conn)
	}

	this.wq.AllDone()
}
Beispiel #4
0
//goroutine
func (s *Sender) Start() {
	// conn := s.getConnection()
	//初始化fileCacheList
	once.Do(s.reloadFileCache)

	//收尾工作
	defer func() {
		if err := recover(); err != nil {
			loglib.Error(fmt.Sprintf("sender %d panic:%v", s.id, err))
		}

		s.saveBufferInChan()

		//s.saveMemCache()

		s.connection.close()

		s.wq.AllDone()

	}()

	go s.pickPacks()
	//var connLost = 0
	var quit = false
	go lib.HandleQuitSignal(func() {
		quit = true
		s.connection.close()
	})

	var sendInterval = time.Duration(2000) //间隔稍大,避免发送文件缓存时因无连接或其他错误进入死循环

	var timeoutChan = time.After(sendInterval * time.Millisecond)
	for !quit {

		select {
		case b := <-s.memBuffer:
			//send b
			result := s.sendBuffer(b)
			if result == false {
				//改为直接放入文件缓存
				s.writeToFile(b)
			}

		case <-timeoutChan:
			timeoutChan = time.After(sendInterval * time.Millisecond)

			// send from file
			e := fileList.Remove()
			if e != nil { // file list is not empty
				filename := e.Value.(string)
				// fmt.Println("sender ",s.id,": get file :",filename)
				data, err := ioutil.ReadFile(filename)
				if err != nil {
					// fmt.Println("sender ",s.id,":",err)
					if _, ok := err.(*os.PathError); !ok {
						fileList.PushBack(filename)
					}
					loglib.Error(fmt.Sprintf("sender%d read file cache %s error:%s", s.id, filename, err.Error()))
				} else {

					packId := tcp_pack.GetPackId(data)                                                                          //debug info
					loglib.Info(fmt.Sprintf("sender%d read pack %s from file: %s, len: %d", s.id, packId, filename, len(data))) //debug info
					result := s.sendData2(data)
					if result == true {
						// s.fileCacheList.Remove(front)
						// log.Println("sender ",s.id,":removed file:",filename, "for pack", packId)//debug info
						err = os.Remove(filename)
						lib.CheckError(err)
						timeoutChan = time.After(time.Millisecond) //发送成功,不用再等待
					} else {
						fileList.PushBack(filename)
						// fmt.Println("sender ",s.id,": pushback file :",filename)
					}
				}
			}

		}
	}

}
Beispiel #5
0
func (this *Tailler) taillingCurrent(receiveChan chan map[string]string) {
	var n_lines = ""
	if this.lineNum >= 0 {
		n_lines = fmt.Sprintf("+%d", this.lineNum+1) //略过已经tail过的行
	} else {
		n_lines = "0" //从最后开始
	}

	var quit = false
	//收尾工作
	defer func() {
		if err := recover(); err != nil {
			loglib.Error(fmt.Sprintf("tailler panic:%v", err))
		}

		//如果是quit,丢弃不完整的包
		if quit {
			this.lineNum -= this.lineNum % this.recvBufSize
		}
		saveLineRecord(this.recordPath, this.currFile, this.lineNum)

		this.wq.AllDone()
	}()

	//启动时读取行号,以后都从首行开始
	cmd := exec.Command("tail", "-F", "-n", n_lines, this.currFile)
	n_lines = "+1"
	stdout, err := cmd.StdoutPipe()

	if err != nil {
		loglib.Error("open pipe error")
	}

	//系统信号监听
	go lib.HandleQuitSignal(func() {
		quit = true
		if cmd.Process != nil {
			cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止
		}
	})

	//日志切割检测
	go func() {
		nextHour := this.fileHour.Add(time.Hour)
		nextHourFile := this.getLogFileByTime(nextHour)
		timeToWait := 10 * time.Minute //到达下一小时后,等待日志文件的最长时间,10分钟
		for {
			if quit {
				break
			}
			if lib.FileExists(nextHourFile) || time.Now().Sub(nextHour) > timeToWait {
				currFile := this.currFile
				totalLines := this.GetTotalLines(currFile)
				loglib.Info(fmt.Sprintf("log rotated! previous file: %s, total lines: %d", currFile, totalLines))

				//在kill前进行文件切换,避免kill后新的tail启动时文件名还是旧的
				this.fileHour = nextHour
				this.currFile = nextHourFile
				nextHour = nextHour.Add(time.Hour)
				nextHourFile = this.getLogFileByTime(nextHour)

				//发现日志切割,等待1分钟
				i := 0
				done := false
				for {
					if this.lineNum >= totalLines {
						done = true
					}
					if done || i > 60 {
						if cmd.Process != nil {
							cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止
						}
						if done {
							loglib.Info("finish tail " + currFile)
						} else {
							loglib.Info("tail " + currFile + " timeout")
						}
						break
					}
					i++
					time.Sleep(time.Second)
				}
			}
			time.Sleep(time.Second)
		}

	}()

outer:
	for {
		currFile := this.currFile //缓存当前tail的文件名
		hourStr := this.fileHour.Format(this.hourStrFmt)
		cmd.Start()
		loglib.Info("begin current log: " + currFile)
		rd := bufio.NewReader(stdout)
		for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n') {
			//fmt.Print(line)
			if quit {
				break outer
			}
			this.lineNum++
			m := map[string]string{"hour": hourStr, "line": line}
			receiveChan <- m
			if this.lineNum%this.recvBufSize == 0 {
				saveLineRecord(this.recordPath, currFile, this.lineNum)
			}
		}
		if err := cmd.Wait(); err != nil {
			loglib.Info("wait sys tail error!" + err.Error())
		}
		loglib.Info(fmt.Sprintf("%s tailed %d lines", currFile, this.lineNum))
		if quit {
			break
		}
		// 完整tail一个文件
		m := map[string]string{"hour": hourStr, "line": changeStr}
		receiveChan <- m
		saveLineRecord(this.recordPath, currFile, this.lineNum)
		//begin a new file
		this.lineNum = 0
		cmd = exec.Command("tail", "-F", "-n", n_lines, this.currFile)
		stdout, err = cmd.StdoutPipe()

		if err != nil {
			loglib.Error("open pipe error")
			break
		}
	}
}
Beispiel #6
0
func (this *Tailler) taillingPrevious(filePath string, lineNum int, hourStr string, receiveChan chan map[string]string) bool {
	var n_lines = ""
	if lineNum >= 0 {
		n_lines = fmt.Sprintf("+%d", lineNum+1) //略过已经tail过的行
	} else {
		n_lines = "0" //从最后开始
	}

	loglib.Info("begin previous log: " + filePath + " from line: " + n_lines)
	var quit = false
	//收尾工作
	defer func() {
		if err := recover(); err != nil {
			loglib.Error(fmt.Sprintf("tailler panic:%v", err))
		}

		//如果是quit,丢弃不完整的包
		if quit {
			lineNum -= lineNum % this.recvBufSize
		}
		saveLineRecord(this.recordPath, filePath, lineNum)
	}()

	//启动时读取行号,以后都从首行开始
	cmd := exec.Command("tail", "-n", n_lines, filePath)
	stdout, err := cmd.StdoutPipe()

	if err != nil {
		loglib.Error("open pipe error")
	}

	//系统信号监听
	go lib.HandleQuitSignal(func() {
		quit = true
		if cmd.Process != nil {
			cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止
		}
	})

	cmd.Start()
	rd := bufio.NewReader(stdout)
	for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n') {
		//fmt.Print(line)
		if quit {
			break
		}
		lineNum++
		m := map[string]string{"hour": hourStr, "line": line}
		receiveChan <- m
		if lineNum%this.recvBufSize == 0 {
			saveLineRecord(this.recordPath, filePath, lineNum)
		}
	}
	if err := cmd.Wait(); err != nil {
		loglib.Info("wait sys tail error!" + err.Error())
	}
	loglib.Info(fmt.Sprintf("%s tailed %d lines", filePath, lineNum))
	if !quit {
		// 完整tail一个文件
		m := map[string]string{"hour": hourStr, "line": changeStr}
		receiveChan <- m
		saveLineRecord(this.recordPath, filePath, lineNum)
	}
	return quit

}
Beispiel #7
0
func (t *TcpReceiver) handleConnnection(conn net.Conn, wg *sync.WaitGroup) {
	defer func() {
		if err := recover(); err != nil {
			loglib.Error(fmt.Sprintf("tcp receiver connection panic:%v", err))
		}
		conn.Close()
		wg.Done()
	}()
	/*
	   用于标识收到退出信号后,能否直接退出
	   只要接收信号时,包没有收完,都是可退出的,
	   发送方会缓存以后重传;
	   如果收完了就不能直接退出,可能包已传给下一级处理但是
	   却告诉发送方发送失败
	*/
	var quit = false //用于标识是否要退出

	go lib.HandleQuitSignal(func() {
		//关闭连接,避免阻塞在网络io上
		conn.Close()
		quit = true
	})

	request := make([]byte, 512*1024) //缓冲为512k

	var packLen int = 0
	currLen := 0
	var b = new(bytes.Buffer)
	var content = new(bytes.Buffer)
	inAddr := conn.RemoteAddr().String()
	parts := strings.Split(inAddr, ":")
	inIp := parts[0]

	packId := "unkown"

	var routeInfo map[string]string
	var rePull = false //是否补拉,如果是补拉就不做重复包检验

	loglib.Info("incoming: " + inAddr)

outer:
	for !quit {

		st := time.Now()
		if packLen == 0 {
			conn.SetReadDeadline(time.Now().Add(5 * time.Minute))
			time1 := time.Now() //时间打点
			// read zlib pack header length
			buf := make([]byte, 4)
			_, err := conn.Read(buf)
			if err != nil {
				loglib.Warning(fmt.Sprintf("conn:%s, get header len, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time1)))
				break
			}
			l, _ := binary.Uvarint(buf)
			headerLen := int(l)
			//get pack header
			headerBuf := make([]byte, headerLen)
			time2 := time.Now()
			_, err = conn.Read(headerBuf)
			if err != nil {
				loglib.Warning(fmt.Sprintf("conn:%s, get header, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time2)))
				break
			}

			//是否补拉
			route0 := tcp_pack.ParseHeader(headerBuf)
			if v, ok := route0["repull"]; ok && v == "1" {
				rePull = true
			} else {
				rePull = false
			}

			buf = append(buf, headerBuf...)
			header, _, err := tcp_pack.ExtractHeader(buf)
			if err != nil {
				loglib.Error("wrong format header " + string(headerBuf) + " " + err.Error())
				conn.Write([]byte("wrong header"))
				break
			}

			packId = tcp_pack.GetPackId(buf)
			packLen = header.PackLen
			currLen = 0
			routeInfo = make(map[string]string)
			b = new(bytes.Buffer)
			content = new(bytes.Buffer)

			loglib.Info(fmt.Sprintf("conn:%s, start receive pack %s, pack len:%d, header len:%d, header elapse:%s", inAddr, packId, packLen, headerLen, time.Now().Sub(time1)))
			b.Write(buf)

			routeInfo["ip"] = lib.GetIp()
			routeInfo["stage"] = "tcp recv"
			routeInfo["st"] = st.Format("2006-01-02 15:04:05.000")
		}
		//读包体的超时
		conn.SetReadDeadline(time.Now().Add(5 * time.Minute))
		time3 := time.Now()
		//read enough bytes
		for currLen < packLen {
			requestLen, err := conn.Read(request)
			if requestLen == 0 || err != nil {
				//sender有重发机制,所以可丢弃
				packLen = 0 //设为0以便读取新的包

				ed := time.Now()
				loglib.Warning(fmt.Sprintf("conn:%s, not full! ip:%s, packid:%s, received:%d, end recv:%s, elapse:%s, body elapse:%s, error:%s", inAddr, inIp, packId, currLen, ed, ed.Sub(st), ed.Sub(time3), err.Error()))
				break outer //连接出错直接跳出外层循环
			}
			currLen += requestLen
			content.Write(request[:requestLen])
		}
		if packLen > 0 && currLen >= packLen {
			//收完马上应答
			_, err := conn.Write([]byte("ok"))
			if err != nil {
				loglib.Warning(fmt.Sprintf("ip:%s, packid:%s received, but response back error:%s", inIp, packId, err.Error()))
			} else {
				loglib.Info(fmt.Sprintf("conn:%s, response to packid:%s", inAddr, packId))
			}
			//避免收到重复包(补拉例外)
			appeared, ok, code := t.hasAppeared(content)
			if !ok || rePull {
				ed := time.Now()
				routeInfo["ed"] = ed.Format("2006-01-02 15:04:05.000")
				routeInfo["elapse"] = ed.Sub(st).String()
				b.Write(content.Bytes())
				vbytes := tcp_pack.Packing(b.Bytes(), routeInfo, true)
				b = bytes.NewBuffer(vbytes)
				t.buffer <- *b
				packAppear := PackAppear{time.Now().Unix(), packId}
				t.mutex.Lock()
				t.footPrint[code] = packAppear //这里挂过
				t.mutex.Unlock()

				loglib.Info(fmt.Sprintf("conn:%s, finish ip:%s, packid:%s, repull:%v, received:%d, elapse:%s, body elapse:%s", inAddr, inIp, packId, rePull, currLen, ed.Sub(st), ed.Sub(time3)))
			} else {
				loglib.Info(fmt.Sprintf("conn:%s, pack %s repeat %s already appear at %s", inAddr, packId, appeared.Id, time.Unix(appeared.Time, 0)))
			}
			packLen = 0
		}

	}
	loglib.Info("conn finish: " + inAddr)
}
Beispiel #8
0
//重新保存先前失败的文档
func (this *MongoDbOutputer) retrySave(wg *sync.WaitGroup, routineId int) {
	var session *mgo.Session
	var coll *mgo.Collection
	//routine一般都要copy或clone session,clone能保证一致性
	if this.session != nil {
		session = this.session.Clone()
	}

	defer func() {
		if session != nil {
			session.Close()
		}
		wg.Done()
		loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d quit.", routineId))
	}()

	var quit = false
	go lib.HandleQuitSignal(func() {
		quit = true
	})

	dateStr := ""
	loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d start", routineId))
	for !quit {
		e := this.fileList.Remove()
		if e != nil {
			filename := e.Value.(string)
			b, err := ioutil.ReadFile(filename)
			if err != nil {
				if _, ok := err.(*os.PathError); !ok {
					this.fileList.PushBack(filename) //非路径错误,下次再试
				}
				loglib.Error(fmt.Sprintf("load cache %s error:%v", filename, err))
			} else {
				m := bson.M{}
				err = json.Unmarshal(b, &m)
				if err != nil {
					loglib.Error(fmt.Sprintf("unmarshar %s error:%v", filename, err))
				} else {
					tp, _ := m["type"].(string)
					date, _ := m["date"].(string)
					if date != dateStr && session != nil {
						coll = session.DB(this.db + date).C(this.collection) //按天分库
						dateStr = date
					}
					if tp == "bulk" {
						data, _ := m["data"].([]interface{})
						err = this.bulkSaveBson(coll, data...)
					} else {
						data, _ := m["data"].(map[string]interface{})
						sel := bson.M{this.transactionIdKey: data[this.transactionIdKey]}
						up := bson.M{"$set": data}
						_, err = this.upsertBson(coll, sel, up)
					}
					if err != nil {
						this.fileList.PushBack(filename)
						loglib.Error(fmt.Sprintf("re-save cache %s error:%v", filename, err))
						if session.Ping() != nil {
							//refresh go-routine's session if possible
							this.reCloneRoutineSession(&session)
							if session.Ping() == nil {
								loglib.Info(fmt.Sprintf("retry routine %d re-conn", routineId))
							}
						}
					} else {
						err = os.Remove(filename)
						if err != nil {
							loglib.Error(fmt.Sprintf("remove file: %s error:%v", filename, err))
						} else {
							loglib.Info(fmt.Sprintf("cache file: %s send out", filename))
						}

					}
				}
			}
		}
		time.Sleep(500 * time.Millisecond)
	}
}