Beispiel #1
0
func timeGet(u string) (rec *record) {
	rec = new(record)

	transport := urlfetch.Transport{
		Context:  context,
		Deadline: urlFetchTimeout,
	}
	req, err := http.NewRequest("POST", u, nil)
	if err != nil {
		rec.Err = err
		return rec
	}
	start := time.Now()
	resp, err := transport.RoundTrip(req)
	end := time.Now()
	if err != nil {
		rec.Err = err
		return rec
	}
	defer resp.Body.Close()

	rec.Duration = end.Sub(start).Seconds()
	rec.Status = resp.Status

	return rec
}
Beispiel #2
0
func handler(w http.ResponseWriter, r *http.Request) {
	context = appengine.NewContext(r)
	fr, err := copyRequest(r)
	if err != nil {
		context.Errorf("copyRequest: %s", err)
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	transport := urlfetch.Transport{
		Context: context,
		// Despite the name, Transport.Deadline is really a timeout and
		// not an absolute deadline as used in the net package. In
		// other words it is a time.Duration, not a time.Time.
		Deadline: urlFetchTimeout,
	}
	resp, err := transport.RoundTrip(fr)
	if err != nil {
		context.Errorf("RoundTrip: %s", err)
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	defer resp.Body.Close()
	for _, key := range reflectedHeaderFields {
		value := resp.Header.Get(key)
		if value != "" {
			w.Header().Add(key, value)
		}
	}
	w.WriteHeader(resp.StatusCode)
	n, err := io.Copy(w, resp.Body)
	if err != nil {
		context.Errorf("io.Copy after %d bytes: %s", n, err)
	}
}
Beispiel #3
0
func fetch(c appengine.Context, key string) error {
	s, ok := Sources[key]
	if !ok {
		return fmt.Errorf("%q not found", key)
	}

	c.Debugf("fetching %s data", key)
	transport := urlfetch.Transport{Context: c, Deadline: 60 * time.Second}
	req, err := http.NewRequest("GET", s.URL, strings.NewReader(""))
	if err != nil {
		return err
	}
	resp, err := transport.RoundTrip(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("fetch: bad status %d for %s", resp.StatusCode, s.URL)
	}
	contents, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return err
	}

	item := &memcache.Item{
		Key:        key,
		Value:      contents,
		Expiration: s.Expiration,
	}
	if err := memcache.Set(c, item); err != nil {
		return err
	}

	// We keep the last updated time in memcache.  It's not
	// updated atomically with the page, so it's only used to
	// limit the rate of fetches from the data servers.  Don't use
	// it for display; use the data creation times in the data
	// instead.  It doesn't matter to the user that we fetched a
	// weather forecast 3 minutes ago if the forecast is 48
	// minutes old.
	item = &memcache.Item{
		Key:   key + "_fresh",
		Value: []byte(strconv.FormatInt(time.Now().Unix(), 10)),
	}
	if err := memcache.Set(c, item); err != nil {
		return err
	}

	c.Infof("cached %d bytes of %s data", len(contents), key)
	return nil
}
Beispiel #4
0
func fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event {
	errorResponse := new(event.NotifyEvent)
	errorResponse.SetId(ev.GetId())
	req, err := ev.ToRequest("")

	if nil != err {
		errorResponse.Code = event.ErrInvalidHttpRequest
		errorResponse.Reason = fmt.Sprintf("Invalid fetch url:%s with err:%v", ev.URL, err)
		return errorResponse
	}
	var t urlfetch.Transport
	t.Context = context
	t.Deadline, _ = time.ParseDuration("10s")
	t.AllowInvalidServerCertificate = true
	retryCount := 2
	for retryCount > 0 {
		resp, err := t.RoundTrip(req)
		if err == nil {
			res := event.NewHTTPResponseEvent(resp)
			for nil != resp.Body {
				buffer := make([]byte, 8192)
				n, er := resp.Body.Read(buffer)
				if nil != er {
					context.Errorf("Failed to read body for reason:%v", er)
					break
				}
				res.Content = append(res.Content, buffer[0:n]...)
			}
			if resp.ContentLength != int64(len(res.Content)) {
				context.Errorf("Failed to read body %d %d", resp.ContentLength, len(res.Content))
			}
			context.Errorf("%v %d %d", resp.Header.Get("Content-Length"), resp.ContentLength, len(res.Content))
			return res
		}
		context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.URL, err)
		retryCount--
		if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			errorResponse.Code = event.ErrTooLargeResponse
			return errorResponse
		}
	}
	errorResponse.Code = event.ErrRemoteProxyTimeout
	errorResponse.Reason = fmt.Sprintf("Fetch timeout for url:%s", ev.URL)
	return errorResponse

}
Beispiel #5
0
func fetch(url string, c appengine.Context) ([]byte, error) {
	transport := urlfetch.Transport{
		Context:                       c,
		Deadline:                      time.Duration(20) * time.Second,
		AllowInvalidServerCertificate: false,
	}
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return nil, err
	}
	resp, err := transport.RoundTrip(req)
	if err != nil {
		return nil, err
	}
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}
	resp.Body.Close()
	return body, nil
}
Beispiel #6
0
func Fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event {
	errorResponse := new(event.HTTPResponseEvent)
	if Cfg.IsMaster == 1 {
		fillErrorResponse(errorResponse, "Proxy service is no enable in snova master node.")
		return errorResponse
	}
	if isInBlacklist(ev.GetHeader("Host")) {
		fillErrorResponse(errorResponse, "Current site is in blacklist.")
		return errorResponse
	}
	req := buildHTTPRequest(ev)

	if req == nil {
		errorResponse.Status = 400
		fillErrorResponse(errorResponse, "Invalid fetch url:"+ev.Url)
		return errorResponse
	}
	var t urlfetch.Transport
	t.Context = context
	t.Deadline, _ = time.ParseDuration("10s")
	t.AllowInvalidServerCertificate = true
	//t := &transport
	//t := &urlfetch.Transport{context, 0, true}
	retryCount := Cfg.RetryFetchCount
	for retryCount > 0 {
		resp, err := t.RoundTrip(req)
		if err == nil {
			res := buildHTTPResponseEvent(resp)
			if res.Status == 302 {
				rangeHeader := req.Header.Get("Range")
				if len(rangeHeader) > 0 {
					res.AddHeader("X-Range", rangeHeader)
				}
			}
			return res
		}
		context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.Url, err)
		retryCount--
		if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			rangeLimit := Cfg.RangeFetchLimit
			rangestart := 0
			rangeheader := req.Header.Get("Range")
			if len(rangeheader) > 0 {
				rangestart, _ = util.ParseRangeHeaderValue(rangeheader)
			}
			req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", rangestart, rangeLimit-1))
		}
		if strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") {
			time.Sleep(1 * time.Second)
			return Fetch(context, ev)
		}

	}
	errorResponse.Status = 408
	fillErrorResponse(errorResponse, "Fetch timeout for url:"+ev.Url)
	rangeHeader := req.Header.Get("Range")
	if len(rangeHeader) > 0 {
		errorResponse.SetHeader("X-Range", rangeHeader)
	}
	return errorResponse

}
Beispiel #7
0
// UrlGetter universal http getter for app engine and standalone go programs.
// Previously response was returned. Forgot why. Dropped it.
func UrlGetter(gaeReq *http.Request, options Options) (
	[]byte, Info, error,
) {

	options.LogLevel = 2

	var err error
	var inf Info = Info{}

	if options.LogLevel > 0 {
		if options.Req != nil {
			inf.Msg += fmt.Sprintf("orig req url: %#v\n", options.Req.URL.String())
		} else {
			inf.Msg += fmt.Sprintf("orig str url: %#v\n", options.URL)
		}
	}

	//
	// Either take provided request
	// Or build one from options.URL
	if options.Req == nil {
		ourl, err := URLFromString(options.URL) // Normalize
		if err != nil {
			return nil, inf, err
		}
		options.URL = ourl.String()
		options.Req, err = http.NewRequest("GET", options.URL, nil)
		if err != nil {
			return nil, inf, err
		}
	} else {
		if options.Req.URL.Scheme == "" {
			options.Req.URL.Scheme = "https"
		}
	}
	r := options.Req

	if len(options.KnownProtocol) > 1 {
		if strings.HasSuffix(options.KnownProtocol, ":") {
			options.KnownProtocol = strings.TrimSuffix(options.KnownProtocol, ":")
		}
		if options.KnownProtocol == "http" || options.KnownProtocol == "https" {
			r.URL.Scheme = options.KnownProtocol
			inf.Msg += fmt.Sprintf("Using known protocol %q\n", options.KnownProtocol)
		}
	}

	//
	// Unifiy appengine plain http.client
	client := &http.Client{}
	if gaeReq == nil {
		client.Timeout = time.Duration(5 * time.Second) // GAE does not allow
	} else {
		c := util_appengine.SafelyExtractGaeContext(gaeReq)
		if c != nil {

			ctxOld := oldAE.NewContext(gaeReq)
			client = oldFetch.Client(ctxOld)

			// this does not prevent urlfetch: SSL_CERTIFICATE_ERROR
			// it merely leads to err = "DEADLINE_EXCEEDED"
			tr := oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: true}
			// thus
			tr = oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: false}

			tr.Deadline = 20 * time.Second // only possible on aeOld

			client.Transport = &tr
			// client.Timeout = 20 * time.Second // also not in google.golang.org/appengine/urlfetch

		} else {
			return nil, inf, ErrNoContext
		}

		// appengine dev server => always fallback to http
		if c != nil && appengine.IsDevAppServer() && !options.ForceHTTPSEvenOnDevelopmentServer {
			r.URL.Scheme = "http"
		}
	}

	inf.URL = r.URL

	if options.RedirectHandling == 1 {
		client.CheckRedirect = func(req *http.Request, via []*http.Request) error {

			if len(via) == 1 && req.URL.Path == via[0].URL.Path+"/" {
				// allow redirect from /gesundheit to /gesundheit/
				return nil
			}

			spath := "\n"
			for _, v := range via {
				spath += v.URL.Path + "\n"
			}
			spath += req.URL.Path + "\n"
			return fmt.Errorf("%v %v", MsgNoRdirects, spath)
		}
	}

	if options.LogLevel > 0 {
		inf.Msg += fmt.Sprintf("url standardized to %q  %q %q \n", r.URL.Scheme, r.URL.Host, r.URL.RequestURI())
	}

	//
	//
	// Respond to test.economist.com directly from memory
	if _, ok := TestData[r.URL.Host+r.URL.Path]; ok {
		return TestData[r.URL.Host+r.URL.Path], inf, nil
	}

	// The actual call
	// =============================

	resp, err := client.Do(r)

	// Swallow redirect errors
	if err != nil {
		if options.RedirectHandling == 1 {
			serr := err.Error()
			if strings.Contains(serr, MsgNoRdirects) {
				bts := []byte(serr)
				inf.Mod = time.Now().Add(-10 * time.Minute)
				return bts, inf, nil
			}
		}
	}

	isHTTPSProblem := false
	if err != nil {
		isHTTPSProblem = strings.Contains(err.Error(), "SSL_CERTIFICATE_ERROR") ||
			strings.Contains(err.Error(), "tls: oversized record received with length")
	}

	// Under narrow conditions => fallback to http
	if err != nil {
		if isHTTPSProblem && r.URL.Scheme == "https" && r.Method == "GET" {
			r.URL.Scheme = "http"
			var err2nd error
			resp, err2nd = client.Do(r)
			// while protocol http may go through
			// next obstacle might be - again - a redirect error:
			if err2nd != nil {
				if options.RedirectHandling == 1 {
					serr := err2nd.Error()
					if strings.Contains(serr, MsgNoRdirects) {
						bts := []byte(serr)
						inf.Mod = time.Now().Add(-10 * time.Minute)
						addFallBackSuccessInfo(options, &inf, r, err)
						return bts, inf, nil
					}
				}

				return nil, inf, fmt.Errorf("GET fallback to http failed with %v", err2nd)
			}
			addFallBackSuccessInfo(options, &inf, r, err)
			err = nil // CLEAR error
		}
	}

	//
	// Final error handler
	//
	if err != nil {
		hintAE := ""
		if isHTTPSProblem && r.URL.Scheme == "https" {
			// Not GET but POST:
			// We cannot do a fallback for a post request - the r.Body.Reader is consumed
			// options.r.URL.Scheme = "http"
			// resp, err = client.Do(options.Req)
			return nil, inf, fmt.Errorf("Cannot do https requests. Possible reason: Dev server: %v", err)
		} else if strings.Contains(
			err.Error(),
			"net/http: Client Transport of type init.failingTransport doesn't support CancelRequest; Timeout not supported",
		) {
			hintAE = "\nDid you forget to submit the AE Request?\n"
		}
		return nil, inf, fmt.Errorf("request failed: %v - %v", err, hintAE)
	}

	//
	// We got response, but
	// explicit bad response from server
	if resp.StatusCode != http.StatusOK {

		if resp.StatusCode == http.StatusBadRequest || // 400
			resp.StatusCode == http.StatusNotFound || // 404
			false {
			dmp := ""
			for k, v := range resp.Header {
				dmp += fmt.Sprintf("key: %v - val %v\n", k, v)
			}
			dmp = ""
			dmp += stringspb.IndentedDump(r.URL)

			bts, errRd := ioutil.ReadAll(resp.Body)
			if errRd != nil {
				return nil, inf, fmt.Errorf("cannot read resp body: %v", errRd)
			}
			if len(bts) > 2*1024 {
				btsApdx := append([]byte(" ...omitted... "), bts[len(bts)-100:]...)
				bts = append(bts[2*1024:], btsApdx...)
			}
			defer resp.Body.Close()

			err2 := fmt.Errorf("resp %v: %v \n%v \n<pre>%s</pre>", resp.StatusCode, r.URL.String(), dmp, bts)

			if r.URL.Path == "" {
				r.URL.Path = "/"
			}
			var err2nd error
			resp, err2nd = client.Do(r)
			if err2nd != nil {
				return nil, inf, fmt.Errorf("again error %v \n%v", err2nd, err2)
			}
			if resp.StatusCode != http.StatusOK {
				inf.Status = resp.StatusCode
				return nil, inf, fmt.Errorf("again Status NotOK %v \n%v", resp.StatusCode, err2)
			}
			log.Printf("successful retry with '/' to %v after %v\n", r.URL.String(), err)
			err = nil // CLEAR error

			// return nil, inf, err2

		} else {
			return nil, inf, fmt.Errorf("bad http resp code: %v - %v", resp.StatusCode, r.URL.String())
		}
	}

	bts, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return nil, inf, fmt.Errorf("cannot read resp body: %v", err)
	}
	defer resp.Body.Close()

	// time stamp
	var tlm time.Time // time last modified
	lm := resp.Header.Get("Last-Modified")
	if lm != "" {
		tlm, err = time.Parse(time.RFC1123, lm) // Last-Modified: Sat, 29 Aug 2015 21:15:39 GMT
		if err != nil {
			tlm, err = time.Parse(time.RFC1123Z, lm) // with numeric time zone
			if err != nil {
				var zeroTime time.Time
				tlm = zeroTime
			}
		}
	}
	inf.Mod = tlm
	// log.Printf("    hdr  %v %v\n", lm, tlm.Format(time.ANSIC))

	return bts, inf, nil

}