//自定义函数 func New(taskname string) *Spider { mlog.StraceInst().Open() ap := &Spider{taskname: taskname} // init filelog. ap.CloseFileLog() ap.exitWhenComplete = true ap.sleeptype = "fixed" ap.startSleeptime = 0 // init spider if ap.pScheduler == nil { ap.SetScheduler(scheduler.NewQueueScheduler(false)) } if ap.pDownloader == nil { ap.SetDownloader(downloader.NewHttpDownloader()) } mlog.StraceInst().Println("** start spider **") ap.pPiplelines = make([]pipeline.Pipeline, 0) return ap }
func TestQueueScheduler(t *testing.T) { var r *request.Request r = request.NewRequest("http://baidu.com", "html", "", "GET", "", nil, nil, nil, nil) fmt.Printf("%v\n", r) var s *scheduler.QueueScheduler s = scheduler.NewQueueScheduler(false) s.Push(r) var count int = s.Count() if count != 1 { t.Error("count error") } fmt.Println(count) var r1 *request.Request r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) // remove duplicate s = scheduler.NewQueueScheduler(true) r2 := request.NewRequest("http://qq.com", "html", "", "GET", "", nil, nil, nil, nil) s.Push(r) s.Push(r2) s.Push(r) count = s.Count() if count != 2 { t.Error("count error") } fmt.Println(count) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) }
func (this *Spider) close() { this.SetScheduler(scheduler.NewQueueScheduler(false)) this.SetDownloader(downloader.NewHttpDownloader()) this.pPiplelines = make([]pipeline.Pipeline, 0) this.exitWhenComplete = true }