func (this *Spider) Run() *Spider { for { var req *page.Request = this.m_scheduler.Poll() if req == nil { if this.countHandlingUrl() == 0 { break } time.Sleep(500 * time.Millisecond) //这里需要判断有没有没有处理完的请求,如果都处理完了可以退出,如果还没有处理完,那么继续等待下一个时间片段 continue } //req.GetUrl this.waitForReqProcesser(req.GetUrl(), req.GetUrlTag()) go func(*page.Request) { //deal the page,may get new pages this.pageProcess(req) }(req) } //运行结束 return this }
func (this *QueueScheduler) Push(req *page.Request) { //TODO this.lock.Lock() defer this.lock.Unlock() _, ok := this.rmKeys[req.GetUrl()] if ok { return } this.rmKeys[req.GetUrl()] = req.GetUrlTag() this.queue.PushBack(req) }