// 获取指定蜘蛛在上一次运行时失败的请求 func (self *Failure) PullFailure(spiderName string) (reqs []*context.Request) { if len(self.list[spiderName]) == 0 { return } self.RWMutex.Lock() defer self.RWMutex.Unlock() for failure, _ := range self.list[spiderName] { req, err := context.UnSerialize(failure) if err == nil { reqs = append(reqs, req) } } self.list[spiderName] = make(map[string]bool) return }
// 读取失败记录 func (self *History) ReadFailure(provider string, inherit bool) { self.RWMutex.Lock() self.provider = provider self.RWMutex.Unlock() if !inherit { // 不继承历史记录时 self.Failure.list = make(map[string]map[string]bool) self.Failure.inheritable = false return } else if self.Failure.inheritable { // 本次与上次均继承历史记录时 return } else { // 上次没有继承历史记录,但本次继承时 self.Failure.list = make(map[string]map[string]bool) self.Failure.inheritable = true } var fLen int switch provider { case "mgo": var docs = []interface{}{} s, c, err := mgo.Open(MGO_DB, FAILURE_FILE) if err != nil { logs.Log.Error("从mgo读取成功记录: %v", err) return } c.Find(nil).All(&docs) mgo.Close(s) for _, v := range docs { failure := v.(bson.M)["_id"].(string) req, err := context.UnSerialize(failure) if err != nil { continue } spName := req.GetSpiderName() if _, ok := self.Failure.list[spName]; !ok { self.Failure.list[spName] = make(map[string]bool) } self.Failure.list[spName][failure] = true fLen++ } case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!") return } rows, err := mysql.New(db.DB). SetTableName("`" + FAILURE_FILE + "`"). SelectAll() if err != nil { // logs.Log.Error("读取Mysql数据库中成功记录失败:%v", err) return } mysql.MysqlPool.Free(db) for rows.Next() { var id int var failure string err = rows.Scan(&id, &failure) req, err := context.UnSerialize(failure) if err != nil { continue } spName := req.GetSpiderName() if _, ok := self.Failure.list[spName]; !ok { self.Failure.list[spName] = make(map[string]bool) } self.Failure.list[spName][failure] = true fLen++ } default: f, err := os.Open(FAILURE_FILE_FULL) if err != nil { return } b, _ := ioutil.ReadAll(f) f.Close() b[0] = '{' json.Unmarshal( append(b, '}'), &self.Failure.list, ) for _, v := range self.Failure.list { fLen += len(v) } } logs.Log.Informational(" * 读出 %v 条失败记录\n", fLen) }