Пример #1
0
// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	// 初始化该蜘蛛的队列
	spiderId, ok := req.GetSpiderId()
	if !ok {
		return
	}

	self.RLock()
	defer self.RUnlock()

	if self.status == status.STOP {
		return
	}

	// 当req不可重复时,有重复则返回
	if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) {
		return
	}

	// 初始化该蜘蛛下该优先级队列
	priority := req.GetPriority()
	if !self.foundPriority(spiderId, priority) {
		self.addPriority(spiderId, priority)
	}

	defer func() {
		recover()
	}()

	// 添加请求到队列
	self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req)
}
Пример #2
0
// 添加请求到队列
func (self *Matrix) Push(req *context.Request) {
	sdl.RLock()
	defer sdl.RUnlock()

	if sdl.status == status.STOP ||
		self.maxPage >= 0 ||
		// 当req不可重复下载时,已存在成功记录则返回
		!req.IsReloadable() && !UpsertSuccess(req) {
		return
	}

	// 大致限制加入队列的请求量,并发情况下应该会比maxPage多
	atomic.AddInt64(&self.maxPage, 1)

	priority := req.GetPriority()

	// 初始化该蜘蛛下该优先级队列
	if _, found := self.reqs[priority]; !found {
		self.priorities = append(self.priorities, priority)
		sort.Ints(self.priorities) // 从小到大排序
		self.reqs[priority] = []*context.Request{}
	}

	// 添加请求到队列
	self.reqs[priority] = append(self.reqs[priority], req)
}
Пример #3
0
func (self *SrcManage) Push(req *context.Request) {
	spiderId, ok := req.GetSpiderId()
	if !ok {
		return
	}

	// 初始化该蜘蛛的队列
	if _, ok := self.queue[spiderId]; !ok {
		self.mutex[spiderId] = new(sync.Mutex)
		self.queue[spiderId] = make(map[int][]*context.Request)
	}

	priority := req.GetPriority()

	// 登记该蜘蛛下该优先级队列
	if _, ok := self.queue[spiderId][priority]; !ok {
		self.uIndex(spiderId, priority)
	}

	// 添加请求到队列
	self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req)
}