Beispiel #1
0
func timeGet(u string) (rec *record) {
	rec = new(record)

	transport := urlfetch.Transport{
		Context:  context,
		Deadline: urlFetchTimeout,
	}
	req, err := http.NewRequest("POST", u, nil)
	if err != nil {
		rec.Err = err
		return rec
	}
	start := time.Now()
	resp, err := transport.RoundTrip(req)
	end := time.Now()
	if err != nil {
		rec.Err = err
		return rec
	}
	defer resp.Body.Close()

	rec.Duration = end.Sub(start).Seconds()
	rec.Status = resp.Status

	return rec
}
Beispiel #2
0
func handler(w http.ResponseWriter, r *http.Request) {
	context = appengine.NewContext(r)
	fr, err := copyRequest(r)
	if err != nil {
		context.Errorf("copyRequest: %s", err)
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	transport := urlfetch.Transport{
		Context: context,
		// Despite the name, Transport.Deadline is really a timeout and
		// not an absolute deadline as used in the net package. In
		// other words it is a time.Duration, not a time.Time.
		Deadline: urlFetchTimeout,
	}
	resp, err := transport.RoundTrip(fr)
	if err != nil {
		context.Errorf("RoundTrip: %s", err)
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	defer resp.Body.Close()
	for _, key := range reflectedHeaderFields {
		value := resp.Header.Get(key)
		if value != "" {
			w.Header().Add(key, value)
		}
	}
	w.WriteHeader(resp.StatusCode)
	n, err := io.Copy(w, resp.Body)
	if err != nil {
		context.Errorf("io.Copy after %d bytes: %s", n, err)
	}
}
Beispiel #3
0
func fetch(c appengine.Context, key string) error {
	s, ok := Sources[key]
	if !ok {
		return fmt.Errorf("%q not found", key)
	}

	c.Debugf("fetching %s data", key)
	transport := urlfetch.Transport{Context: c, Deadline: 60 * time.Second}
	req, err := http.NewRequest("GET", s.URL, strings.NewReader(""))
	if err != nil {
		return err
	}
	resp, err := transport.RoundTrip(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("fetch: bad status %d for %s", resp.StatusCode, s.URL)
	}
	contents, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return err
	}

	item := &memcache.Item{
		Key:        key,
		Value:      contents,
		Expiration: s.Expiration,
	}
	if err := memcache.Set(c, item); err != nil {
		return err
	}

	// We keep the last updated time in memcache.  It's not
	// updated atomically with the page, so it's only used to
	// limit the rate of fetches from the data servers.  Don't use
	// it for display; use the data creation times in the data
	// instead.  It doesn't matter to the user that we fetched a
	// weather forecast 3 minutes ago if the forecast is 48
	// minutes old.
	item = &memcache.Item{
		Key:   key + "_fresh",
		Value: []byte(strconv.FormatInt(time.Now().Unix(), 10)),
	}
	if err := memcache.Set(c, item); err != nil {
		return err
	}

	c.Infof("cached %d bytes of %s data", len(contents), key)
	return nil
}
Beispiel #4
0
func fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event {
	errorResponse := new(event.NotifyEvent)
	errorResponse.SetId(ev.GetId())
	req, err := ev.ToRequest("")

	if nil != err {
		errorResponse.Code = event.ErrInvalidHttpRequest
		errorResponse.Reason = fmt.Sprintf("Invalid fetch url:%s with err:%v", ev.URL, err)
		return errorResponse
	}
	var t urlfetch.Transport
	t.Context = context
	t.Deadline, _ = time.ParseDuration("10s")
	t.AllowInvalidServerCertificate = true
	retryCount := 2
	for retryCount > 0 {
		resp, err := t.RoundTrip(req)
		if err == nil {
			res := event.NewHTTPResponseEvent(resp)
			for nil != resp.Body {
				buffer := make([]byte, 8192)
				n, er := resp.Body.Read(buffer)
				if nil != er {
					context.Errorf("Failed to read body for reason:%v", er)
					break
				}
				res.Content = append(res.Content, buffer[0:n]...)
			}
			if resp.ContentLength != int64(len(res.Content)) {
				context.Errorf("Failed to read body %d %d", resp.ContentLength, len(res.Content))
			}
			context.Errorf("%v %d %d", resp.Header.Get("Content-Length"), resp.ContentLength, len(res.Content))
			return res
		}
		context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.URL, err)
		retryCount--
		if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			errorResponse.Code = event.ErrTooLargeResponse
			return errorResponse
		}
	}
	errorResponse.Code = event.ErrRemoteProxyTimeout
	errorResponse.Reason = fmt.Sprintf("Fetch timeout for url:%s", ev.URL)
	return errorResponse

}
Beispiel #5
0
func fetch(url string, c appengine.Context) ([]byte, error) {
	transport := urlfetch.Transport{
		Context:                       c,
		Deadline:                      time.Duration(20) * time.Second,
		AllowInvalidServerCertificate: false,
	}
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return nil, err
	}
	resp, err := transport.RoundTrip(req)
	if err != nil {
		return nil, err
	}
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}
	resp.Body.Close()
	return body, nil
}
Beispiel #6
0
func Fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event {
	errorResponse := new(event.HTTPResponseEvent)
	if Cfg.IsMaster == 1 {
		fillErrorResponse(errorResponse, "Proxy service is no enable in snova master node.")
		return errorResponse
	}
	if isInBlacklist(ev.GetHeader("Host")) {
		fillErrorResponse(errorResponse, "Current site is in blacklist.")
		return errorResponse
	}
	req := buildHTTPRequest(ev)

	if req == nil {
		errorResponse.Status = 400
		fillErrorResponse(errorResponse, "Invalid fetch url:"+ev.Url)
		return errorResponse
	}
	var t urlfetch.Transport
	t.Context = context
	t.Deadline, _ = time.ParseDuration("10s")
	t.AllowInvalidServerCertificate = true
	//t := &transport
	//t := &urlfetch.Transport{context, 0, true}
	retryCount := Cfg.RetryFetchCount
	for retryCount > 0 {
		resp, err := t.RoundTrip(req)
		if err == nil {
			res := buildHTTPResponseEvent(resp)
			if res.Status == 302 {
				rangeHeader := req.Header.Get("Range")
				if len(rangeHeader) > 0 {
					res.AddHeader("X-Range", rangeHeader)
				}
			}
			return res
		}
		context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.Url, err)
		retryCount--
		if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			rangeLimit := Cfg.RangeFetchLimit
			rangestart := 0
			rangeheader := req.Header.Get("Range")
			if len(rangeheader) > 0 {
				rangestart, _ = util.ParseRangeHeaderValue(rangeheader)
			}
			req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", rangestart, rangeLimit-1))
		}
		if strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			time.Sleep(1 * time.Second)
			return Fetch(context, ev)
		}

	}
	errorResponse.Status = 408
	fillErrorResponse(errorResponse, "Fetch timeout for url:"+ev.Url)
	rangeHeader := req.Header.Get("Range")
	if len(rangeHeader) > 0 {
		errorResponse.SetHeader("X-Range", rangeHeader)
	}
	return errorResponse

}