// 添加请求到队列 func (self *scheduler) Push(req *context.Request) { // 初始化该蜘蛛的队列 spiderId, ok := req.GetSpiderId() if !ok { return } self.RLock() defer self.RUnlock() if self.status == status.STOP { return } // 当req不可重复时,有重复则返回 if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) { return } // 初始化该蜘蛛下该优先级队列 priority := req.GetPriority() if !self.foundPriority(spiderId, priority) { self.addPriority(spiderId, priority) } defer func() { recover() }() // 添加请求到队列 self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req) }
// 添加请求到队列 func (self *Matrix) Push(req *context.Request) { sdl.RLock() defer sdl.RUnlock() if sdl.status == status.STOP || self.maxPage >= 0 || // 当req不可重复下载时,已存在成功记录则返回 !req.IsReloadable() && !UpsertSuccess(req) { return } // 大致限制加入队列的请求量,并发情况下应该会比maxPage多 atomic.AddInt64(&self.maxPage, 1) priority := req.GetPriority() // 初始化该蜘蛛下该优先级队列 if _, found := self.reqs[priority]; !found { self.priorities = append(self.priorities, priority) sort.Ints(self.priorities) // 从小到大排序 self.reqs[priority] = []*context.Request{} } // 添加请求到队列 self.reqs[priority] = append(self.reqs[priority], req) }
func (self *SrcManage) Push(req *context.Request) { spiderId, ok := req.GetSpiderId() if !ok { return } // 初始化该蜘蛛的队列 if _, ok := self.queue[spiderId]; !ok { self.mutex[spiderId] = new(sync.Mutex) self.queue[spiderId] = make(map[int][]*context.Request) } priority := req.GetPriority() // 登记该蜘蛛下该优先级队列 if _, ok := self.queue[spiderId][priority]; !ok { self.uIndex(spiderId, priority) } // 添加请求到队列 self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req) }