func (self *Downloader) Start(threadNum int) { self.requests = make([]*Request, threadNum) for i := 0; i < threadNum; i++ { request := NewRequest() self.requests[i] = request self.requests[i].SetDelegate(self) } for i := 0; i < threadNum; i++ { go func(index int, retryMaxCount int) { var urlStr string for { elem := self.scheduler.ShiftElementItem() if elem != nil { elemItem := elem.(common.ElementItem) urlStr = elemItem.UrlStr req, res, err := self.requests[index].Init(urlStr).Request() if err != nil { if elemItem.FaildCount < retryMaxCount { elemItem.FaildCount += 1 self.scheduler.AddElementItem(elemItem, true) } } else { params := make([]interface{}, 0) page := common.NewPage(req, res) self.process.Do(page) res.Body.Close() items, elems := page.GetAll() for _, v := range elems { self.scheduler.AddElementItem(v, false) } for _, v := range items { params = append(params, v) } self.pipeliner.CallMiddlewareMethod("GetItems", params) } Threads <- index } else { time.Sleep(self.sleepTime) } } }(i, retryMaxCount) } }
func (self *Request) Request() (*common.Page, error) { var err interface{} = nil body := &strings.Reader{} values := url.Values{} if len(self.params) > 0 { params := self.params for v, k := range params { values.Add(v, k) } body = strings.NewReader(values.Encode()) } req, reqError := http.NewRequest("GET", self.urlStr, body) var page *common.Page = nil if reqError == nil { requestError := sErr.RequestError{-1, false} self.delegate.CallMiddlewareMethod("SetRequest", []interface{}{req, &requestError}) if requestError.Exist { err = requestError } res, resError := self.client.Do(req) if resError == nil { responseError := sErr.ResponseError{false} page := common.NewPage(req, res) self.delegate.CallMiddlewareMethod("GetResponse", []interface{}{page, &responseError}) if responseError.Exist { err = responseError } else if res.StatusCode == 200 { return page, nil } else { err = sErr.RequestError{res.StatusCode, true} } } else { err = resError } } else { err = reqError } self.delegate.CallMiddlewareMethod("Error", []interface{}{self.client, err}) return page, err.(error) }