示例#1
0
文件: failure.go 项目: ss7247/pholcus
// 获取指定蜘蛛在上一次运行时失败的请求
func (self *Failure) PullFailure(spiderName string) (reqs []*context.Request) {
	if len(self.list[spiderName]) == 0 {
		return
	}

	self.RWMutex.Lock()
	defer self.RWMutex.Unlock()

	for failure, _ := range self.list[spiderName] {
		req, err := context.UnSerialize(failure)
		if err == nil {
			reqs = append(reqs, req)
		}
	}
	self.list[spiderName] = make(map[string]bool)
	return
}
示例#2
0
文件: history.go 项目: Cdim/pholcus
// 读取失败记录
func (self *History) ReadFailure(provider string, inherit bool) {
	self.RWMutex.Lock()
	self.provider = provider
	self.RWMutex.Unlock()

	if !inherit {
		// 不继承历史记录时
		self.Failure.list = make(map[string]map[string]bool)
		self.Failure.inheritable = false
		return

	} else if self.Failure.inheritable {
		// 本次与上次均继承历史记录时
		return

	} else {
		// 上次没有继承历史记录,但本次继承时
		self.Failure.list = make(map[string]map[string]bool)
		self.Failure.inheritable = true
	}
	var fLen int
	switch provider {
	case "mgo":
		var docs = []interface{}{}
		s, c, err := mgo.Open(MGO_DB, FAILURE_FILE)
		if err != nil {
			logs.Log.Error("从mgo读取成功记录: %v", err)
			return
		}
		c.Find(nil).All(&docs)

		mgo.Close(s)

		for _, v := range docs {
			failure := v.(bson.M)["_id"].(string)
			req, err := context.UnSerialize(failure)
			if err != nil {
				continue
			}
			spName := req.GetSpiderName()
			if _, ok := self.Failure.list[spName]; !ok {
				self.Failure.list[spName] = make(map[string]bool)
			}
			self.Failure.list[spName][failure] = true
			fLen++
		}

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!")
			return
		}
		rows, err := mysql.New(db.DB).
			SetTableName("`" + FAILURE_FILE + "`").
			SelectAll()
		if err != nil {
			// logs.Log.Error("读取Mysql数据库中成功记录失败:%v", err)
			return
		}

		mysql.MysqlPool.Free(db)

		for rows.Next() {
			var id int
			var failure string
			err = rows.Scan(&id, &failure)
			req, err := context.UnSerialize(failure)
			if err != nil {
				continue
			}
			spName := req.GetSpiderName()
			if _, ok := self.Failure.list[spName]; !ok {
				self.Failure.list[spName] = make(map[string]bool)
			}
			self.Failure.list[spName][failure] = true
			fLen++
		}

	default:
		f, err := os.Open(FAILURE_FILE_FULL)
		if err != nil {
			return
		}
		b, _ := ioutil.ReadAll(f)
		f.Close()

		b[0] = '{'
		json.Unmarshal(
			append(b, '}'),
			&self.Failure.list,
		)
		for _, v := range self.Failure.list {
			fLen += len(v)
		}

	}
	logs.Log.Informational(" *     读出 %v 条失败记录\n", fLen)
}