func main() { // spider input: // PageProcesser ; // task name used in Pipeline for record; if len(os.Args) < 2 { fmt.Println("need an url as param,like ./main 'http://baidu.com/' ") return } myPageProcess := NewMyPageProcesser() sp := spider.NewSpider(myPageProcess, "mailSpider") go func() { for { <-time.After(time.Second) loadWhiteAndBlackList(sp) } }() go doSendMail(myPageProcess.MailHandle) sp.SetScheduler(scheduler.NewQueueScheduler(true)). SetExitWhenComplete(false). AddUrl(os.Args[1], "html"). // AddPipeline(pipeline.NewPipelineConsole()). SetThreadnum(3). Run() }
func TestQueueScheduler(t *testing.T) { var r *request.Request r = request.NewRequest("http://baidu.com", "html", "", "GET", "", nil, nil, nil, nil) fmt.Printf("%v\n", r) var s *scheduler.QueueScheduler s = scheduler.NewQueueScheduler(false) s.Push(r) var count int = s.Count() if count != 1 { t.Error("count error") } fmt.Println(count) var r1 *request.Request r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) // remove duplicate s = scheduler.NewQueueScheduler(true) r2 := request.NewRequest("http://qq.com", "html", "", "GET", "", nil, nil, nil, nil) s.Push(r) s.Push(r2) s.Push(r) count = s.Count() if count != 2 { t.Error("count error") } fmt.Println(count) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) }