Esempio n. 1
0
//自定义函数
func New(taskname string) *Spider {
	mlog.StraceInst().Open()

	ap := &Spider{taskname: taskname}

	// init filelog.
	ap.CloseFileLog()
	ap.exitWhenComplete = true
	ap.sleeptype = "fixed"
	ap.startSleeptime = 0

	// init spider
	if ap.pScheduler == nil {
		ap.SetScheduler(scheduler.NewQueueScheduler(false))
	}

	if ap.pDownloader == nil {
		ap.SetDownloader(downloader.NewHttpDownloader())
	}

	mlog.StraceInst().Println("** start spider **")
	ap.pPiplelines = make([]pipeline.Pipeline, 0)

	return ap
}
func TestQueueScheduler(t *testing.T) {
	var r *request.Request
	r = request.NewRequest("http://baidu.com", "html", "", "GET", "", nil, nil, nil, nil)
	fmt.Printf("%v\n", r)

	var s *scheduler.QueueScheduler
	s = scheduler.NewQueueScheduler(false)

	s.Push(r)
	var count int = s.Count()
	if count != 1 {
		t.Error("count error")
	}
	fmt.Println(count)

	var r1 *request.Request
	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)

	// remove duplicate
	s = scheduler.NewQueueScheduler(true)

	r2 := request.NewRequest("http://qq.com", "html", "", "GET", "", nil, nil, nil, nil)
	s.Push(r)
	s.Push(r2)
	s.Push(r)
	count = s.Count()
	if count != 2 {
		t.Error("count error")
	}
	fmt.Println(count)

	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)
	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)
}
Esempio n. 3
0
func (this *Spider) close() {
	this.SetScheduler(scheduler.NewQueueScheduler(false))
	this.SetDownloader(downloader.NewHttpDownloader())
	this.pPiplelines = make([]pipeline.Pipeline, 0)
	this.exitWhenComplete = true
}