コード例 #1
0
ファイル: client.go プロジェクト: fanyang01/crawler
func (c *StdClient) revalidate(
	u *url.URL, r *http.Response, body []byte, cc *cache.Control,
) (
	rr *http.Response, rcc *cache.Control, modified bool, err error,
) {
	modified = true

	req, _ := http.NewRequest("GET", u.String(), nil)
	if cc.ETag != "" {
		req.Header.Add("If-None-Match", cc.ETag)
	}
	var t time.Time
	if t = cc.LastModified; t.IsZero() {
		t = cc.Date
	}
	req.Header.Add("If-Modified-Since", t.Format(http.TimeFormat))

	if rr, err = c.client.Do(req); err != nil {
		return
	}

	switch {
	case rr.StatusCode == 304:
		rr.Body.Close()
		rr = cache.Construct(r, rr, body)
		if rr.Request.URL.String() == u.String() {
			modified = false
		}
		fallthrough
	case 200 <= rr.StatusCode && rr.StatusCode < 300:
		rcc = cache.Parse(rr, time.Now())
		if rcc == nil || !rcc.IsCacheable() {
			c.cache.Remove(u)
		}
		return
	// 5xx and 4xx but 404 are retryable.
	case rr.StatusCode >= 500:
		fallthrough
	case rr.StatusCode >= 400 && rr.StatusCode != 404:
		rr.Body.Close()
		err = RetryableError{
			Err: ResponseStatusError(rr.StatusCode),
		}
		return
	default:
		rr.Body.Close()
		err = ResponseStatusError(rr.StatusCode)
		return
	}
}
コード例 #2
0
ファイル: client.go プロジェクト: fanyang01/crawler
// Do implements the Client interface.
func (c *StdClient) Do(req *Request) (r *Response, err error) {
	defer func() {
		if err != nil && r != nil {
			r.free()
		}
	}()

	var (
		hr       *http.Response
		cc       *cache.Control
		body     []byte
		now      time.Time
		ok       bool
		modified bool = true
	)
	if req.Method == "GET" && c.cache != nil {
		if hr, body, cc, ok = c.cache.Get(req.URL); ok {
			if cc.NeedValidate() {
				if hr, cc, modified, err = c.revalidate(
					req.URL, hr, body, cc,
				); err != nil {
					return
				}
			} else {
				modified = false
				hr.Body = ioutil.NopCloser(bytes.NewReader(body))
			}
			now = time.Now()
			goto INIT
		}
	}

	if hr, err = c.client.Do(req.Request); err != nil {
		return nil, RetryableError{Err: err}
	}
	now = time.Now()

	// Only status code 2xx is OK.
	switch {
	case 200 <= hr.StatusCode && hr.StatusCode < 300:
	// 5xx and 4xx but 404 are retryable.
	case hr.StatusCode >= 500:
		fallthrough
	case hr.StatusCode >= 400 && hr.StatusCode != 404:
		hr.Body.Close()
		err = RetryableError{
			Err: ResponseStatusError(hr.StatusCode),
		}
		return
	default:
		hr.Body.Close()
		err = ResponseStatusError(hr.StatusCode)
		return
	}
	if c.cache != nil {
		cc = cache.Parse(hr, now)
	}

INIT:
	r = NewResponse()
	r.init(req.URL, hr, now, cc)
	if c.cache != nil && cc != nil && cc.IsCacheable() {
		if !modified { // Just update cached header
			if ok := c.cache.Update(req.URL, r.CacheControl, r.Header); ok {
				return
			}
		}
		r.Body = c.cache.NewReader(r.NewURL, r.CacheControl, r.Response, r.Body)
	}
	return
}