func main() { req := robot.NewRequest(wkSohuUrl, "html", "index", "GET", "", nil, nil, nil, nil) sohuSpider := robot.NewSpider(NewMyPageProcesser(), "Sohu"). AddRequest(req). SetSleepTime("rand", 500, 1000). SetThreadnum(2) for i := 1; i < maxWKSouhuLayer; i++ { url := fmt.Sprintf("http://yule.sohu.com/gossip/index_%d.shtml", 5301-i) // magic num req := robot.NewRequest(url, "html", "index", "GET", "", nil, nil, nil, nil) sohuSpider.AddRequest(req) } sohuSpider.Run() }
func TestQueueScheduler(t *testing.T) { var r *robot.Request r = robot.NewRequest("http://baidu.com", "html", "", "GET", "", nil, nil, nil, nil) fmt.Printf("%v\n", r) var s *QueueScheduler s = NewQueueScheduler(false) s.Push(r) var count int = s.Count() if count != 1 { t.Error("count error") } fmt.Println(count) var r1 *robot.Request r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) // remove duplicate s = NewQueueScheduler(true) r2 := robot.NewRequest("http://qq.com", "html", "", "GET", "", nil, nil, nil, nil) s.Push(r) s.Push(r2) s.Push(r) count = s.Count() if count != 2 { t.Error("count error") } fmt.Println(count) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) r1 = s.Poll() if r1 == nil { t.Error("poll error") } fmt.Printf("%v\n", r1) }
func addRequest(p *robot.Page, tag, url, cookie, content string) { req := robot.NewRequest(url, "json", tag, "GET", "", nil, nil, nil, content) p.AddTargetRequestWithParams(req) }