// 读取失败记录 func (self *History) ReadFailure(provider string, inherit bool) { self.RWMutex.Lock() self.provider = provider self.RWMutex.Unlock() if !inherit { // 不继承历史记录时 self.Failure.list = make(map[string]map[string]bool) self.Failure.inheritable = false return } else if self.Failure.inheritable { // 本次与上次均继承历史记录时 return } else { // 上次没有继承历史记录,但本次继承时 self.Failure.list = make(map[string]map[string]bool) self.Failure.inheritable = true } var fLen int switch provider { case "mgo": var docs = []interface{}{} s, c, err := mgo.Open(MGO_DB, FAILURE_FILE) if err != nil { logs.Log.Error("从mgo读取成功记录: %v", err) return } c.Find(nil).All(&docs) mgo.Close(s) for _, v := range docs { failure := v.(bson.M)["_id"].(string) req, err := context.UnSerialize(failure) if err != nil { continue } spName := req.GetSpiderName() if _, ok := self.Failure.list[spName]; !ok { self.Failure.list[spName] = make(map[string]bool) } self.Failure.list[spName][failure] = true fLen++ } case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!") return } rows, err := mysql.New(db.DB). SetTableName("`" + FAILURE_FILE + "`"). SelectAll() if err != nil { // logs.Log.Error("读取Mysql数据库中成功记录失败:%v", err) return } mysql.MysqlPool.Free(db) for rows.Next() { var id int var failure string err = rows.Scan(&id, &failure) req, err := context.UnSerialize(failure) if err != nil { continue } spName := req.GetSpiderName() if _, ok := self.Failure.list[spName]; !ok { self.Failure.list[spName] = make(map[string]bool) } self.Failure.list[spName][failure] = true fLen++ } default: f, err := os.Open(FAILURE_FILE_FULL) if err != nil { return } b, _ := ioutil.ReadAll(f) f.Close() b[0] = '{' json.Unmarshal( append(b, '}'), &self.Failure.list, ) for _, v := range self.Failure.list { fLen += len(v) } } logs.Log.Informational(" * 读出 %v 条失败记录\n", fLen) }
func (self *Failure) flush(provider string) (fLen int) { self.RWMutex.Lock() defer self.RWMutex.Unlock() for _, val := range self.list { fLen += len(val) } switch provider { case "mgo": s, c, err := mgo.Open(MGO_DB, FAILURE_FILE) if err != nil { logs.Log.Error("从mgo读取成功记录: %v", err) return } defer mgo.Close(s) // 删除失败记录文件 c.DropCollection() if fLen == 0 { return } var docs = []interface{}{} for _, val := range self.list { for key := range val { docs = append(docs, map[string]interface{}{"_id": key}) } } c.Insert(docs...) case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!") return 0 } // 删除失败记录文件 stmt, err := db.DB.Prepare(`DROP TABLE ` + FAILURE_FILE) if err != nil { return } stmt.Exec() if fLen == 0 { return } table := mysql.New(db.DB). SetTableName(FAILURE_FILE). AddColumn(`failure MEDIUMTEXT`). Create() for _, val := range self.list { for key := range val { table.AddRow(key).Update() } } mysql.MysqlPool.Free(db) default: // 删除失败记录文件 os.Remove(FAILURE_FILE_FULL) if fLen == 0 { return } f, _ := os.OpenFile(FAILURE_FILE_FULL, os.O_CREATE|os.O_WRONLY, 0660) b, _ := json.Marshal(self.list) b[0] = ',' f.Write(b[:len(b)-1]) f.Close() } return }