Пример #1
0
// 读取失败记录
func (self *History) ReadFailure(provider string, inherit bool) {
	self.RWMutex.Lock()
	self.provider = provider
	self.RWMutex.Unlock()

	if !inherit {
		// 不继承历史记录时
		self.Failure.list = make(map[string]map[string]bool)
		self.Failure.inheritable = false
		return

	} else if self.Failure.inheritable {
		// 本次与上次均继承历史记录时
		return

	} else {
		// 上次没有继承历史记录,但本次继承时
		self.Failure.list = make(map[string]map[string]bool)
		self.Failure.inheritable = true
	}
	var fLen int
	switch provider {
	case "mgo":
		var docs = []interface{}{}
		s, c, err := mgo.Open(MGO_DB, FAILURE_FILE)
		if err != nil {
			logs.Log.Error("从mgo读取成功记录: %v", err)
			return
		}
		c.Find(nil).All(&docs)

		mgo.Close(s)

		for _, v := range docs {
			failure := v.(bson.M)["_id"].(string)
			req, err := context.UnSerialize(failure)
			if err != nil {
				continue
			}
			spName := req.GetSpiderName()
			if _, ok := self.Failure.list[spName]; !ok {
				self.Failure.list[spName] = make(map[string]bool)
			}
			self.Failure.list[spName][failure] = true
			fLen++
		}

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!")
			return
		}
		rows, err := mysql.New(db.DB).
			SetTableName("`" + FAILURE_FILE + "`").
			SelectAll()
		if err != nil {
			// logs.Log.Error("读取Mysql数据库中成功记录失败:%v", err)
			return
		}

		mysql.MysqlPool.Free(db)

		for rows.Next() {
			var id int
			var failure string
			err = rows.Scan(&id, &failure)
			req, err := context.UnSerialize(failure)
			if err != nil {
				continue
			}
			spName := req.GetSpiderName()
			if _, ok := self.Failure.list[spName]; !ok {
				self.Failure.list[spName] = make(map[string]bool)
			}
			self.Failure.list[spName][failure] = true
			fLen++
		}

	default:
		f, err := os.Open(FAILURE_FILE_FULL)
		if err != nil {
			return
		}
		b, _ := ioutil.ReadAll(f)
		f.Close()

		b[0] = '{'
		json.Unmarshal(
			append(b, '}'),
			&self.Failure.list,
		)
		for _, v := range self.Failure.list {
			fLen += len(v)
		}

	}
	logs.Log.Informational(" *     读出 %v 条失败记录\n", fLen)
}
Пример #2
0
func (self *Failure) flush(provider string) (fLen int) {
	self.RWMutex.Lock()
	defer self.RWMutex.Unlock()
	for _, val := range self.list {
		fLen += len(val)
	}

	switch provider {
	case "mgo":
		s, c, err := mgo.Open(MGO_DB, FAILURE_FILE)
		if err != nil {
			logs.Log.Error("从mgo读取成功记录: %v", err)
			return
		}
		defer mgo.Close(s)

		// 删除失败记录文件
		c.DropCollection()
		if fLen == 0 {
			return
		}

		var docs = []interface{}{}
		for _, val := range self.list {
			for key := range val {
				docs = append(docs, map[string]interface{}{"_id": key})
			}
		}
		c.Insert(docs...)

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!")
			return 0
		}

		// 删除失败记录文件
		stmt, err := db.DB.Prepare(`DROP TABLE ` + FAILURE_FILE)
		if err != nil {
			return
		}
		stmt.Exec()
		if fLen == 0 {
			return
		}

		table := mysql.New(db.DB).
			SetTableName(FAILURE_FILE).
			AddColumn(`failure MEDIUMTEXT`).
			Create()
		for _, val := range self.list {
			for key := range val {
				table.AddRow(key).Update()
			}
		}
		mysql.MysqlPool.Free(db)

	default:
		// 删除失败记录文件
		os.Remove(FAILURE_FILE_FULL)
		if fLen == 0 {
			return
		}

		f, _ := os.OpenFile(FAILURE_FILE_FULL, os.O_CREATE|os.O_WRONLY, 0660)

		b, _ := json.Marshal(self.list)
		b[0] = ','
		f.Write(b[:len(b)-1])
		f.Close()
	}
	return
}