//用于检验session可用性并适时重连的routine func //用于重连main session func (this *MongoDbOutputer) reConnMongoDb() { nPingFail := 0 //ping失败次数 reDial := false for { reDial = false if this.session == nil { //session未初始化 reDial = true }else if(this.session.Ping() != nil) { //session连接丢失 nPingFail++ if nPingFail == 3 { reDial = true } } if reDial { nPingFail = 0 this.session = initMongoDbSession(this.mongosAddr) if this.session == nil { loglib.Info("session re-dial failed!") }else{ loglib.Info("session re-dial success!") } } time.Sleep(time.Second) } }
func (this *MongoDbOutputer) runParse( routineId int, wg *sync.WaitGroup) { var session *mgo.Session //routine一般都要copy或clone session,clone能保证一致性 if this.session != nil { session = this.session.Clone() } defer func(){ if session != nil { session.Close() } wg.Done() loglib.Info(fmt.Sprintf("mongodb outputer parse routine %d quit", routineId)) }() loglib.Info(fmt.Sprintf("mongodb outputer parse routine %d start", routineId)) for b := range this.buffer { r, packId, date, err := this.extract(&b) if err == nil { if !this.isUpsert { this.bulkSave(&session, r, packId, date, routineId) }else{ this.upsert(&session, r, packId, date, routineId) } r.Close() } } }
func (s *Sender) saveBufferInChan() { loglib.Info(fmt.Sprintf("sender%d begin to save pack in chan", s.id)) i := 0 for b := range s.memBuffer { s.writeToFile(b) i++ } loglib.Info(fmt.Sprintf("sender%d saved num of pack in chan: %d", s.id, i)) }
func (s Sender) sendData(data []byte, conn *net.TCPConn) bool { if len(data) == 0 { return true } if conn == nil { return false } /* lenBuf := make([]byte, 4) nData := len(data) binary.PutUvarint(lenBuf, uint64(nData)) data = append(lenBuf, data...) */ st := time.Now() packId := tcp_pack.GetPackId(data) conn.SetDeadline(time.Now().Add(5 * time.Minute)) //设置超时 loglib.Info(fmt.Sprintf("sender%d start sending pack:%s length:%d", s.id, packId, len(data))) n, err := conn.Write(data) ed := time.Now() loglib.Info(fmt.Sprintf("sender%d end sending pack:%s length:%d elapse:%s", s.id, packId, n, ed.Sub(st))) lib.CheckError(err) //写失败了就不用等应答了,肯定拿不到 if err == nil { conn.SetReadDeadline(time.Now().Add(8 * time.Minute)) //设置超时 time1 := time.Now() var temp []byte = make([]byte, 128) count, err := conn.Read(temp) if err == nil { loglib.Info(fmt.Sprintf("sender%d get anwser data len:%d for pack:%s elapse:%s", s.id, count, packId, time.Now().Sub(time1))) } else { loglib.Info(fmt.Sprintf("sender%d get anwser data len:%d for pack:%s elapse:%s, error:%s", s.id, count, packId, time.Now().Sub(time1), err.Error())) } temp = temp[:count] if string(temp) == "ok" { //发送成功 return true } else if string(temp) == "wrong header" { //包头错误,丢弃 loglib.Info(packId + " has wrong header, retry later!") return false } else { //发送失败 //报警 return false } } else { loglib.Warning(fmt.Sprintf("write pack %s error:%s", packId, err.Error())) } return false }
//处理注册请求 func (lr *LogReceiver) handleRegister(registerInfo map[string]string) map[string]string { req, ok := registerInfo["req"] m := map[string]string{"err": "-1", "msg": "unkown request <" + req + ">"} ip, _ := registerInfo["ip"] port, _ := registerInfo["port"] hostname, _ := registerInfo["hostname"] role, _ := registerInfo["role"] addr := ip loglib.Info(ip + ":" + port + " " + req) if ok && (req == "register" || req == "unregister") { if port != "" { addr = ip + ":" + port } if req == "register" { ret, err := lr.register(ip, port, role, hostname) if ret { lr.mutex.Lock() lr.ipRoleMap[addr] = role lr.mutex.Unlock() m["err"] = "0" m["msg"] = "success" } else { m["err"] = "1" if err != nil { m["msg"] = err.Error() } else { m["msg"] = "done" } } } else if req == "unregister" { ret, err := lr.unRegister(ip, port) if ret { lr.mutex.Lock() delete(lr.ipRoleMap, addr) lr.mutex.Unlock() m["err"] = "0" m["msg"] = "success" } else { m["err"] = "1" if err != nil { m["msg"] = err.Error() } else { m["msg"] = "done" } } } } loglib.Info("[monitor] " + ip + ":" + port + " " + role + " " + req + ": " + m["msg"]) return m }
func (e *etlOutputer) doEtl(fkeyChan chan string, logDataDir string, etlDir string, etlDoneDir string, etlFailDir string, spiderList string, colsFile string, hostsList string, ipBlackList string, wg *sync.WaitGroup) { defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("doEtl() panic:%v", err)) } wg.Done() }() loglib.Info("etl routine start") for fkey := range fkeyChan { sv := etl.NewFileSaver(colsFile, etlDir, fkey) d := etl.NewDispatcher(sv, 6, hostsList, ipBlackList) g := etl.NewGlobalHao123(spiderList, 100, 200, 8, d) go g.Start(false) fname := filepath.Join(logDataDir, fkey) loglib.Info("start etl for " + fname) err := g.ParseFile(fname) g.Wait() // etl success // mark success if err == nil { //采用循环,增加打tag的成功率 for i := 0; i < 5; i++ { fd, err := os.Create(filepath.Join(etlDoneDir, fkey)) if err == nil { fd.Close() loglib.Info("finish etl for " + fname) break } else { loglib.Warning("mark etl done for " + fname + " failed! error: " + err.Error()) } } } else { //采用循环,增加打tag的成功率 for i := 0; i < 5; i++ { fd, err := os.Create(filepath.Join(etlFailDir, fkey)) if err == nil { fd.Close() loglib.Info("failed etl for " + fname) break } else { loglib.Warning("mark etl fail for " + fname + " failed! error: " + err.Error()) } } } } loglib.Info("etl routine finish") }
func (t *TcpReceiver) clearFootPrint(wg *sync.WaitGroup) { defer wg.Done() ch1 := make(chan bool) //用于安全退出 ch2 := time.After(time.Hour) //用于定时任务 go lib.HandleQuitSignal(func() { ch1 <- true }) loop: for { select { //监听一个chan以便安全退出 case <-ch1: break loop case <-ch2: //若这个case未执行完,而ch1已可读,select会保证这个case执行完 now := time.Now().Unix() t.mutex.Lock() for code, appear := range t.footPrint { if now-appear.Time >= 86400 { delete(t.footPrint, code) } } t.saveFootPrint() t.mutex.Unlock() ch2 = time.After(time.Hour) //用于定时任务 } } loglib.Info("clear footprint quit!") }
//载入cache文件列表 func (this *MongoDbOutputer) reloadFileCache() { list := lib.GetFilelist(this.file_mem_folder_name) for _,filename := range list { loglib.Info("reloading:" + filename) this.fileList.PushBack(filename) } }
func (this *WaitQuit) Quit() bool { ret := false select { case <-this.ch: loglib.Info(this.name + " safe quit.") ret = true case <-time.After(2 * time.Second): loglib.Info(this.name + " quit timeout") this.nTimeout++ if this.allowTimeout > 0 && this.nTimeout >= this.allowTimeout { ret = true } } return ret }
//should be run by once func (s *Sender) reloadFileCache() { list := lib.GetFilelist(s.file_mem_folder_name) for _, filename := range list { // s.fileCacheList.PushBack(filename) loglib.Info("reloading:" + filename) fileList.PushBack(filename) } }
func (t *TcpReceiver) Start() { tcpAddr, err := net.ResolveTCPAddr("tcp4", t.receiveFromAddress) lib.CheckError(err) listener, err := net.ListenTCP("tcp", tcpAddr) lib.CheckError(err) wg := &sync.WaitGroup{} wg.Add(1) go t.clearFootPrint(wg) //主routine信号处理 go lib.HandleQuitSignal(func() { //接收到信号关闭listenner,此时Accept会马上返回一个nil 的conn listener.Close() loglib.Info("close tcp receiver's listener.") }) defer func() { if err := recover(); err != nil { loglib.Error(fmt.Sprintf("tcp receiver panic:%v", err)) } loglib.Info("wait connections finish...") wg.Wait() loglib.Info("all connections have been processed. quit.") close(t.buffer) //关闭chan t.saveFootPrint() t.wq.AllDone() }() for { conn, err := listener.Accept() if conn == nil { break } lib.CheckError(err) wg.Add(1) go t.handleConnnection(conn, wg) } }
func (f *fileOutputer) extract(bp *bytes.Buffer) { buf := make([]byte, 4) bp.Read(buf) l, _ := binary.Uvarint(buf) headerLen := int(l) //get pack header buf = make([]byte, headerLen) bp.Read(buf) header := tcp_pack.ParseHeader(buf) r, err := zlib.NewReader(bp) if err != nil { loglib.Error("zlib reader Error: " + err.Error()) } else { lines, _ := strconv.Atoi(header["lines"]) done := false if header["done"] == "1" { done = true } f.ic.Add(header["ip"], header["hour"], header["id"], lines, done) writerKey := header["ip"] + "_" + header["hour"] fout := f.getWriter(f.writers, f.dataDir, writerKey) //一头一尾写头信息,节省硬盘 buf = append(buf, '\n') //fout.Write(buf) nn, err := io.Copy(fout, r) if err != nil { loglib.Warning(fmt.Sprintf("save %s_%s_%s error:%s, saved:%d", header["ip"], header["hour"], header["id"], err, nn)) } //fout.Write(buf) //单独存一份header便于查数 fout = f.getWriter(f.headerWriters, f.headerDir, writerKey) n, err := fout.Write(buf) if err != nil { loglib.Info(fmt.Sprintf("writer header %s %d %s", writerKey, n, err.Error())) } if done || time.Now().Unix() > f.checkTime.Unix() { hourFinish, _ := f.ic.Check() for ip, hours := range hourFinish { for _, hour := range hours { writerKey = ip + "_" + hour } } f.closeWriters(f.writers) f.closeWriters(f.headerWriters) f.checkTime.Add(2 * time.Minute) } r.Close() } }
func (s *Sender) sendData2(data []byte) bool { result := s.sendData(data, s.connection.getConn()) //发送失败,tcp连接可能已经失效,重新建立tcp连接 if result == false { s.connection.reconnect(s.connection.getConn()) *s.status = -1 loglib.Info(fmt.Sprintf("sender%d reconnected by sendData2(),status:%d", s.id, *s.status)) } return result }
//从公用的chan读pack到私有的chan,若私有chan已满则写入文件缓存 //保证公用chan不会阻塞 func (s *Sender) pickPacks() { for buf := range s.sBuffer { select { case s.memBuffer <- buf: break default: loglib.Info(fmt.Sprintf("sender%d mem buffer is full, total %d, pub chan:%d", s.id, len(s.memBuffer), len(s.sBuffer))) s.writeToFile(buf) } } close(s.memBuffer) }
//更新插入,按字段更新 func (this *MongoDbOutputer) upsert(psession **mgo.Session, r io.Reader, packId string, date string, routineId int) { nDiscard := 0 nCached := 0 nUpdated := 0 nInserted := 0 var coll *mgo.Collection = nil if *psession != nil { coll = (*psession).DB(this.db + date).C(this.collection) //按天分库 } scanner := bufio.NewScanner(r) for scanner.Scan() { line := scanner.Text() m := this.parseLogLine(line) if len(m) > 0 { selector := bson.M{ this.transactionIdKey: m[ this.transactionIdKey ] } up := bson.M{"$set" : m} info, err := this.upsertBson(coll, selector, up) if err != nil { this.cacheData(m, "upsert", date, routineId) nCached++ //ping fail, re-connect, clone main session if (*psession).Ping() != nil { //refresh go-routine's session if possible this.reCloneRoutineSession(psession) if (*psession).Ping() == nil { loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) } } }else{ nInserted++ nUpdated += info.Updated } }else{ nDiscard++ } } loglib.Info(fmt.Sprintf("save pack %s: inserted:%d, updated:%d, cached:%d, discard %d items", packId, nInserted, nUpdated, nCached, nDiscard)) }
//保存footprint func (t *TcpReceiver) saveFootPrint() { vbytes, err := json.Marshal(t.footPrint) if err != nil { loglib.Error("marshal footprint error:" + err.Error()) return } err = ioutil.WriteFile(t.footPrintFile, vbytes, 0664) if err == nil { loglib.Info("save footprint success !") } else { loglib.Error("save footprint error:" + err.Error()) } }
func saveLineRecord(path string, fname string, lineNum int) { fout, err := os.Create(path) defer fout.Close() if err != nil { loglib.Error("save line record error: " + err.Error()) return } _, err = fmt.Fprintf(fout, "%s %d", fname, lineNum) if err != nil { loglib.Error("Write line record error" + err.Error()) return } loglib.Info("save line record success!") }
func (this *IntegrityChecker) SaveStatus() { m := make(map[string]map[string]map[string]map[string]int) m["hour_received"] = this.hourReceived m["day_received"] = this.dayReceived vbytes, err := json.Marshal(m) if err != nil { loglib.Error("marshal log received error:" + err.Error()) return } err = ioutil.WriteFile(this.statusFile, vbytes, 0664) if err == nil { loglib.Info("save log received success !") } else { loglib.Error("save log received error:" + err.Error()) } }
//处理日志上报 func (lr *LogReceiver) handleLog(logInfo map[string]string) { ip, ok := logInfo["ip"] port, ok := logInfo["port"] remoteAddr := ip if port != "" { remoteAddr += ":" + port } role, ok := lr.ipRoleMap[remoteAddr] if !ok { role = "" } res, err := lr.AddLog(logInfo["time"], remoteAddr, role, logInfo["type"], logInfo["msg"]) if err == nil { loglib.Info(fmt.Sprintf("[log receiver] add %d error log", res.NumRows)) } }
func createSingleConnection(address string) (conn *net.TCPConn, err error) { tcpAddr, err := net.ResolveTCPAddr("tcp4", address) lib.CheckError(err) if err != nil { return nil, err } conn, err = net.DialTCP("tcp", nil, tcpAddr) if err != nil { loglib.Error("get connection from " + address + " failed! Error:" + err.Error()) return nil, err } else { loglib.Info("get connection from " + address + " success! remote addr " + conn.RemoteAddr().String()) } lib.CheckError(err) return conn, nil }
func (t *TcpReceiver) loadFootPrint(fname string) map[string]PackAppear { fp := make(map[string]PackAppear) if lib.FileExists(fname) { vbytes, err := ioutil.ReadFile(fname) if err != nil { loglib.Error("read footprint file error:" + err.Error()) } else { err = json.Unmarshal(vbytes, &fp) if err != nil { loglib.Error("unmarshal footprint error:" + err.Error()) } else { loglib.Info("load footprint success !") } } } else { loglib.Warning("footprint file " + fname + " not found!") } return fp }
func getIpRoleMap(dbConn *db.Mysql) map[string]string { sql := "select ip, port, role from " + registerTable res, err := dbConn.Query(sql) m := make(map[string]string) if err != nil { loglib.Error("read registered nodes failed! Error: " + err.Error()) } else { for _, row := range res.Rows { ip := row[0] port := row[1] role := row[2] if port != "" { ip = ip + ":" + port } m[ip] = role } loglib.Info("readed working nodes from database.") } return m }
func (this *MongoDbOutputer) upsertBson(coll *mgo.Collection, selector interface{}, doc interface{}) (info *mgo.ChangeInfo, err error) { m, _ := selector.(bson.M) tid, _ := m[this.transactionIdKey].(string) if coll != nil { info, err = coll.Upsert(selector, doc) if err != nil { loglib.Error(fmt.Sprintf("save bson [%s] error:%v", tid, err)) }else{ if info.Updated > 0 { loglib.Info(fmt.Sprintf("bson [%s] updated", tid)) } } }else{ info = &mgo.ChangeInfo{} err = errors.New("upsert: collection is nil") loglib.Error(fmt.Sprintf("save bson [%s] error:%v", tid, err)) } return }
func (s *Sender) writeToFile(data bytes.Buffer) { //写入文件 filename := createFileName(s.id) //创建文件 _, err := os.Create(filename) lib.CheckError(err) d := data.Bytes() packId := tcp_pack.GetPackId(d) loglib.Info(fmt.Sprintf("sender%d save pack %s to file %s len:%d", s.id, packId, filename, len(d))) err = ioutil.WriteFile(filename, d, 0666) if err != nil { loglib.Warning("write to file " + filename + " error:" + err.Error()) lib.CheckError(err) } else { //追加fileCacheList fileList.PushBack(filename) } }
func (this *IntegrityChecker) LoadStatus(filename string) map[string]map[string]map[string]map[string]int { m := make(map[string]map[string]map[string]map[string]int) m["hour_received"] = make(map[string]map[string]map[string]int) m["day_received"] = make(map[string]map[string]map[string]int) if lib.FileExists(filename) { vbytes, err := ioutil.ReadFile(filename) if err != nil { loglib.Error("read log received file error:" + err.Error()) } else { err = json.Unmarshal(vbytes, &m) if err != nil { loglib.Error("unmarshal log received error:" + err.Error()) } else { loglib.Info("load log received success !") } } } else { loglib.Warning("log received file " + filename + " not found!") } return m }
//缓存写入mongodb失败的数据 //typeStr为bulk或upsert func (this *MongoDbOutputer) cacheData(data interface{}, typeStr string, date string, routineId int) { mp := bson.M{"type": typeStr, "date": date, "data": data} saveTry := 3 b, err := json.Marshal(mp) arr, ok := data.([]bson.M) cnt := 1 if ok { cnt = len(arr) } if err != nil { loglib.Error(fmt.Sprintf("cache data error when marshal, discard %d item(s), error:%v", cnt, err)) return } fname := this.createFileName(routineId) for i:=0; i<saveTry; i++ { err = ioutil.WriteFile(fname, b, 0666) if err == nil { this.fileList.PushBack(fname) loglib.Info(fmt.Sprintf("cache %d bson", cnt)) break } } }
func (this *HeartBeat) Run() { i := 0 //尝试注册3次 for i < 3 && !this.registerSelf(true) { i++ time.Sleep(10 * time.Second) } l, err := net.Listen("tcp", ":"+this.port) if err != nil { loglib.Error("heart beat " + err.Error()) return } defer l.Close() go lib.HandleQuitSignal(func() { this.registerSelf(false) l.Close() loglib.Info("close heart beat listener.") }) //heart beat 不是太重要,所以退出处理比较简单 for { conn, err := l.Accept() if conn == nil { //listener关闭 break } if err != nil { loglib.Error("heart beat " + err.Error()) break } go this.handleConnection(conn) } this.wq.AllDone() }
//向monitor注册或取消注册,reg为true表示注册,否则是取消注册,返回true表示成功 func (this *HeartBeat) registerSelf(reg bool) bool { conn, err := lib.GetConnection(this.monitorAddr) if err != nil { return false } defer conn.Close() req := "register" if !reg { req = "unregister" } m := map[string]string{"req": req, "ip": lib.GetIp(), "port": this.port, "hostname": lib.GetHostname(), "role": this.role} msg, err := json.Marshal(m) if err != nil { loglib.Error("marshal " + req + " info error " + err.Error()) return false } _, err = conn.Write(tcp_pack.Pack(msg)) if err != nil { loglib.Error("send " + req + " info failed" + err.Error()) return false } else { plen, ret := tcp_pack.UnPack(conn) if plen > 0 { err = json.Unmarshal(ret, &m) r, ok := m["err"] if err == nil && ok && r == "0" { loglib.Info(req + " to monitor success!") return true } loglib.Error(req + " heart beat failed!") } } return false }
//批量插入 func (this *MongoDbOutputer) bulkSave(psession **mgo.Session, r io.Reader, packId string, date string, routineId int) { var coll *mgo.Collection = nil if *psession != nil { coll = (*psession).DB(this.db + date).C(this.collection) //按天分库 } scanner := bufio.NewScanner(r) arr := make([]interface{}, 0) cnt := 0 nDiscard := 0 nInserted := 0 nCached := 0 for scanner.Scan() { line := scanner.Text() m := this.parseLogLine(line) if len(m) > 0 { arr = append(arr, m) cnt++ if cnt >= this.bulkSize { err := this.bulkSaveBson(coll, arr...) if err != nil { this.cacheData(arr, "bulk", date, routineId) nCached += cnt //ping fail, re-connect, clone main session if (*psession).Ping() != nil { //refresh go-routine's session if possible this.reCloneRoutineSession(psession) if (*psession).Ping() == nil { loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) } } }else{ nInserted += cnt } arr = make([]interface{}, 0) cnt = 0 } }else{ nDiscard++ } } cnt = len(arr) if cnt > 0 { err := this.bulkSaveBson(coll, arr...) if err != nil { this.cacheData(arr, "bulk", date, routineId) nCached += cnt //ping fail, re-connect, clone main session if (*psession).Ping() != nil { //refresh go-routine's session if possible this.reCloneRoutineSession(psession) if (*psession).Ping() == nil { loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) } } }else{ nInserted += cnt } } loglib.Info(fmt.Sprintf("save pack %s: inserted:%d, cached:%d, discard %d items", packId, nInserted, nCached, nDiscard)) }
//重新保存先前失败的文档 func (this *MongoDbOutputer) retrySave(wg *sync.WaitGroup, routineId int) { var session *mgo.Session var coll *mgo.Collection //routine一般都要copy或clone session,clone能保证一致性 if this.session != nil { session = this.session.Clone() } defer func(){ if session != nil { session.Close() } wg.Done() loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d quit.", routineId)) }() var quit = false go lib.HandleQuitSignal(func(){ quit = true }) dateStr := "" loglib.Info(fmt.Sprintf("mongodb outputer retry routine %d start", routineId)) for !quit { e := this.fileList.Remove() if e != nil { filename := e.Value.(string) b, err := ioutil.ReadFile(filename) if err != nil { if _, ok := err.(*os.PathError); !ok { this.fileList.PushBack(filename) //非路径错误,下次再试 } loglib.Error(fmt.Sprintf("load cache %s error:%v", filename, err)) }else{ m := bson.M{} err = json.Unmarshal(b, &m) if err != nil { loglib.Error(fmt.Sprintf("unmarshar %s error:%v", filename, err)) }else{ tp, _ := m["type"].(string) date, _ := m["date"].(string) if date != dateStr && session != nil { coll = session.DB(this.db + date).C(this.collection) //按天分库 dateStr = date } if tp == "bulk" { data, _ := m["data"].([]interface{}) err = this.bulkSaveBson(coll, data...) }else{ data, _ := m["data"].(map[string]interface{}) sel := bson.M{this.transactionIdKey : data[this.transactionIdKey]} up := bson.M{"$set" : data} _, err = this.upsertBson(coll, sel, up) } if err != nil { this.fileList.PushBack(filename) loglib.Error(fmt.Sprintf("re-save cache %s error:%v", filename, err)) if session.Ping() != nil { //refresh go-routine's session if possible this.reCloneRoutineSession(&session) if session.Ping() == nil { loglib.Info(fmt.Sprintf("retry routine %d re-conn", routineId)) } } }else{ err = os.Remove(filename) if err != nil { loglib.Error(fmt.Sprintf("remove file: %s error:%v", filename, err)); }else{ loglib.Info(fmt.Sprintf("cache file: %s send out", filename)); } } } } } time.Sleep(500 * time.Millisecond) } }