Beispiel #1
0
func main() {
	// spider input:
	//  PageProcesser ;
	//  task name used in Pipeline for record;
	if len(os.Args) < 2 {
		fmt.Println("need an url as param,like ./main 'http://baidu.com/' ")
		return

	}
	myPageProcess := NewMyPageProcesser()
	sp := spider.NewSpider(myPageProcess, "mailSpider")
	go func() {
		for {
			<-time.After(time.Second)
			loadWhiteAndBlackList(sp)
		}
	}()
	go doSendMail(myPageProcess.MailHandle)
	sp.SetScheduler(scheduler.NewQueueScheduler(true)).
		SetExitWhenComplete(false).
		AddUrl(os.Args[1], "html").
		// AddPipeline(pipeline.NewPipelineConsole()).

		SetThreadnum(3).
		Run()
}
Beispiel #2
0
func TestQueueScheduler(t *testing.T) {
	var r *request.Request
	r = request.NewRequest("http://baidu.com", "html", "", "GET", "", nil, nil, nil, nil)
	fmt.Printf("%v\n", r)

	var s *scheduler.QueueScheduler
	s = scheduler.NewQueueScheduler(false)

	s.Push(r)
	var count int = s.Count()
	if count != 1 {
		t.Error("count error")
	}
	fmt.Println(count)

	var r1 *request.Request
	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)

	// remove duplicate
	s = scheduler.NewQueueScheduler(true)

	r2 := request.NewRequest("http://qq.com", "html", "", "GET", "", nil, nil, nil, nil)
	s.Push(r)
	s.Push(r2)
	s.Push(r)
	count = s.Count()
	if count != 2 {
		t.Error("count error")
	}
	fmt.Println(count)

	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)
	r1 = s.Poll()
	if r1 == nil {
		t.Error("poll error")
	}
	fmt.Printf("%v\n", r1)
}