Ejemplo n.º 1
0
func (this *Spider) Run() *Spider {
	for {
		var req *page.Request = this.m_scheduler.Poll()

		if req == nil {
			if this.countHandlingUrl() == 0 {
				break
			}

			time.Sleep(500 * time.Millisecond)
			//这里需要判断有没有没有处理完的请求,如果都处理完了可以退出,如果还没有处理完,那么继续等待下一个时间片段
			continue
		}

		//req.GetUrl
		this.waitForReqProcesser(req.GetUrl(), req.GetUrlTag())

		go func(*page.Request) {
			//deal the page,may get new pages
			this.pageProcess(req)
		}(req)

	}

	//运行结束
	return this

}
Ejemplo n.º 2
0
func (this *QueueScheduler) Push(req *page.Request) {
	//TODO
	this.lock.Lock()
	defer this.lock.Unlock()
	_, ok := this.rmKeys[req.GetUrl()]
	if ok {
		return
	}
	this.rmKeys[req.GetUrl()] = req.GetUrlTag()
	this.queue.PushBack(req)
}