func timeGet(u string) (rec *record) { rec = new(record) transport := urlfetch.Transport{ Context: context, Deadline: urlFetchTimeout, } req, err := http.NewRequest("POST", u, nil) if err != nil { rec.Err = err return rec } start := time.Now() resp, err := transport.RoundTrip(req) end := time.Now() if err != nil { rec.Err = err return rec } defer resp.Body.Close() rec.Duration = end.Sub(start).Seconds() rec.Status = resp.Status return rec }
func handler(w http.ResponseWriter, r *http.Request) { context = appengine.NewContext(r) fr, err := copyRequest(r) if err != nil { context.Errorf("copyRequest: %s", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } transport := urlfetch.Transport{ Context: context, // Despite the name, Transport.Deadline is really a timeout and // not an absolute deadline as used in the net package. In // other words it is a time.Duration, not a time.Time. Deadline: urlFetchTimeout, } resp, err := transport.RoundTrip(fr) if err != nil { context.Errorf("RoundTrip: %s", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } defer resp.Body.Close() for _, key := range reflectedHeaderFields { value := resp.Header.Get(key) if value != "" { w.Header().Add(key, value) } } w.WriteHeader(resp.StatusCode) n, err := io.Copy(w, resp.Body) if err != nil { context.Errorf("io.Copy after %d bytes: %s", n, err) } }
func fetch(c appengine.Context, key string) error { s, ok := Sources[key] if !ok { return fmt.Errorf("%q not found", key) } c.Debugf("fetching %s data", key) transport := urlfetch.Transport{Context: c, Deadline: 60 * time.Second} req, err := http.NewRequest("GET", s.URL, strings.NewReader("")) if err != nil { return err } resp, err := transport.RoundTrip(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("fetch: bad status %d for %s", resp.StatusCode, s.URL) } contents, err := ioutil.ReadAll(resp.Body) if err != nil { return err } item := &memcache.Item{ Key: key, Value: contents, Expiration: s.Expiration, } if err := memcache.Set(c, item); err != nil { return err } // We keep the last updated time in memcache. It's not // updated atomically with the page, so it's only used to // limit the rate of fetches from the data servers. Don't use // it for display; use the data creation times in the data // instead. It doesn't matter to the user that we fetched a // weather forecast 3 minutes ago if the forecast is 48 // minutes old. item = &memcache.Item{ Key: key + "_fresh", Value: []byte(strconv.FormatInt(time.Now().Unix(), 10)), } if err := memcache.Set(c, item); err != nil { return err } c.Infof("cached %d bytes of %s data", len(contents), key) return nil }
func fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event { errorResponse := new(event.NotifyEvent) errorResponse.SetId(ev.GetId()) req, err := ev.ToRequest("") if nil != err { errorResponse.Code = event.ErrInvalidHttpRequest errorResponse.Reason = fmt.Sprintf("Invalid fetch url:%s with err:%v", ev.URL, err) return errorResponse } var t urlfetch.Transport t.Context = context t.Deadline, _ = time.ParseDuration("10s") t.AllowInvalidServerCertificate = true retryCount := 2 for retryCount > 0 { resp, err := t.RoundTrip(req) if err == nil { res := event.NewHTTPResponseEvent(resp) for nil != resp.Body { buffer := make([]byte, 8192) n, er := resp.Body.Read(buffer) if nil != er { context.Errorf("Failed to read body for reason:%v", er) break } res.Content = append(res.Content, buffer[0:n]...) } if resp.ContentLength != int64(len(res.Content)) { context.Errorf("Failed to read body %d %d", resp.ContentLength, len(res.Content)) } context.Errorf("%v %d %d", resp.Header.Get("Content-Length"), resp.ContentLength, len(res.Content)) return res } context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.URL, err) retryCount-- if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") { errorResponse.Code = event.ErrTooLargeResponse return errorResponse } } errorResponse.Code = event.ErrRemoteProxyTimeout errorResponse.Reason = fmt.Sprintf("Fetch timeout for url:%s", ev.URL) return errorResponse }
func fetch(url string, c appengine.Context) ([]byte, error) { transport := urlfetch.Transport{ Context: c, Deadline: time.Duration(20) * time.Second, AllowInvalidServerCertificate: false, } req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, err } resp, err := transport.RoundTrip(req) if err != nil { return nil, err } body, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, err } resp.Body.Close() return body, nil }
func Fetch(context appengine.Context, ev *event.HTTPRequestEvent) event.Event { errorResponse := new(event.HTTPResponseEvent) if Cfg.IsMaster == 1 { fillErrorResponse(errorResponse, "Proxy service is no enable in snova master node.") return errorResponse } if isInBlacklist(ev.GetHeader("Host")) { fillErrorResponse(errorResponse, "Current site is in blacklist.") return errorResponse } req := buildHTTPRequest(ev) if req == nil { errorResponse.Status = 400 fillErrorResponse(errorResponse, "Invalid fetch url:"+ev.Url) return errorResponse } var t urlfetch.Transport t.Context = context t.Deadline, _ = time.ParseDuration("10s") t.AllowInvalidServerCertificate = true //t := &transport //t := &urlfetch.Transport{context, 0, true} retryCount := Cfg.RetryFetchCount for retryCount > 0 { resp, err := t.RoundTrip(req) if err == nil { res := buildHTTPResponseEvent(resp) if res.Status == 302 { rangeHeader := req.Header.Get("Range") if len(rangeHeader) > 0 { res.AddHeader("X-Range", rangeHeader) } } return res } context.Errorf("Failed to fetch URL[%s] for reason:%v", ev.Url, err) retryCount-- if strings.EqualFold(req.Method, "GET") && strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") { rangeLimit := Cfg.RangeFetchLimit rangestart := 0 rangeheader := req.Header.Get("Range") if len(rangeheader) > 0 { rangestart, _ = util.ParseRangeHeaderValue(rangeheader) } req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", rangestart, rangeLimit-1)) } if strings.Contains(err.Error(), "RESPONSE_TOO_LARGE") { time.Sleep(1 * time.Second) return Fetch(context, ev) } } errorResponse.Status = 408 fillErrorResponse(errorResponse, "Fetch timeout for url:"+ev.Url) rangeHeader := req.Header.Get("Range") if len(rangeHeader) > 0 { errorResponse.SetHeader("X-Range", rangeHeader) } return errorResponse }
// UrlGetter universal http getter for app engine and standalone go programs. // Previously response was returned. Forgot why. Dropped it. func UrlGetter(gaeReq *http.Request, options Options) ( []byte, Info, error, ) { options.LogLevel = 2 var err error var inf Info = Info{} if options.LogLevel > 0 { if options.Req != nil { inf.Msg += fmt.Sprintf("orig req url: %#v\n", options.Req.URL.String()) } else { inf.Msg += fmt.Sprintf("orig str url: %#v\n", options.URL) } } // // Either take provided request // Or build one from options.URL if options.Req == nil { ourl, err := URLFromString(options.URL) // Normalize if err != nil { return nil, inf, err } options.URL = ourl.String() options.Req, err = http.NewRequest("GET", options.URL, nil) if err != nil { return nil, inf, err } } else { if options.Req.URL.Scheme == "" { options.Req.URL.Scheme = "https" } } r := options.Req if len(options.KnownProtocol) > 1 { if strings.HasSuffix(options.KnownProtocol, ":") { options.KnownProtocol = strings.TrimSuffix(options.KnownProtocol, ":") } if options.KnownProtocol == "http" || options.KnownProtocol == "https" { r.URL.Scheme = options.KnownProtocol inf.Msg += fmt.Sprintf("Using known protocol %q\n", options.KnownProtocol) } } // // Unifiy appengine plain http.client client := &http.Client{} if gaeReq == nil { client.Timeout = time.Duration(5 * time.Second) // GAE does not allow } else { c := util_appengine.SafelyExtractGaeContext(gaeReq) if c != nil { ctxOld := oldAE.NewContext(gaeReq) client = oldFetch.Client(ctxOld) // this does not prevent urlfetch: SSL_CERTIFICATE_ERROR // it merely leads to err = "DEADLINE_EXCEEDED" tr := oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: true} // thus tr = oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: false} tr.Deadline = 20 * time.Second // only possible on aeOld client.Transport = &tr // client.Timeout = 20 * time.Second // also not in google.golang.org/appengine/urlfetch } else { return nil, inf, ErrNoContext } // appengine dev server => always fallback to http if c != nil && appengine.IsDevAppServer() && !options.ForceHTTPSEvenOnDevelopmentServer { r.URL.Scheme = "http" } } inf.URL = r.URL if options.RedirectHandling == 1 { client.CheckRedirect = func(req *http.Request, via []*http.Request) error { if len(via) == 1 && req.URL.Path == via[0].URL.Path+"/" { // allow redirect from /gesundheit to /gesundheit/ return nil } spath := "\n" for _, v := range via { spath += v.URL.Path + "\n" } spath += req.URL.Path + "\n" return fmt.Errorf("%v %v", MsgNoRdirects, spath) } } if options.LogLevel > 0 { inf.Msg += fmt.Sprintf("url standardized to %q %q %q \n", r.URL.Scheme, r.URL.Host, r.URL.RequestURI()) } // // // Respond to test.economist.com directly from memory if _, ok := TestData[r.URL.Host+r.URL.Path]; ok { return TestData[r.URL.Host+r.URL.Path], inf, nil } // The actual call // ============================= resp, err := client.Do(r) // Swallow redirect errors if err != nil { if options.RedirectHandling == 1 { serr := err.Error() if strings.Contains(serr, MsgNoRdirects) { bts := []byte(serr) inf.Mod = time.Now().Add(-10 * time.Minute) return bts, inf, nil } } } isHTTPSProblem := false if err != nil { isHTTPSProblem = strings.Contains(err.Error(), "SSL_CERTIFICATE_ERROR") || strings.Contains(err.Error(), "tls: oversized record received with length") } // Under narrow conditions => fallback to http if err != nil { if isHTTPSProblem && r.URL.Scheme == "https" && r.Method == "GET" { r.URL.Scheme = "http" var err2nd error resp, err2nd = client.Do(r) // while protocol http may go through // next obstacle might be - again - a redirect error: if err2nd != nil { if options.RedirectHandling == 1 { serr := err2nd.Error() if strings.Contains(serr, MsgNoRdirects) { bts := []byte(serr) inf.Mod = time.Now().Add(-10 * time.Minute) addFallBackSuccessInfo(options, &inf, r, err) return bts, inf, nil } } return nil, inf, fmt.Errorf("GET fallback to http failed with %v", err2nd) } addFallBackSuccessInfo(options, &inf, r, err) err = nil // CLEAR error } } // // Final error handler // if err != nil { hintAE := "" if isHTTPSProblem && r.URL.Scheme == "https" { // Not GET but POST: // We cannot do a fallback for a post request - the r.Body.Reader is consumed // options.r.URL.Scheme = "http" // resp, err = client.Do(options.Req) return nil, inf, fmt.Errorf("Cannot do https requests. Possible reason: Dev server: %v", err) } else if strings.Contains( err.Error(), "net/http: Client Transport of type init.failingTransport doesn't support CancelRequest; Timeout not supported", ) { hintAE = "\nDid you forget to submit the AE Request?\n" } return nil, inf, fmt.Errorf("request failed: %v - %v", err, hintAE) } // // We got response, but // explicit bad response from server if resp.StatusCode != http.StatusOK { if resp.StatusCode == http.StatusBadRequest || // 400 resp.StatusCode == http.StatusNotFound || // 404 false { dmp := "" for k, v := range resp.Header { dmp += fmt.Sprintf("key: %v - val %v\n", k, v) } dmp = "" dmp += stringspb.IndentedDump(r.URL) bts, errRd := ioutil.ReadAll(resp.Body) if errRd != nil { return nil, inf, fmt.Errorf("cannot read resp body: %v", errRd) } if len(bts) > 2*1024 { btsApdx := append([]byte(" ...omitted... "), bts[len(bts)-100:]...) bts = append(bts[2*1024:], btsApdx...) } defer resp.Body.Close() err2 := fmt.Errorf("resp %v: %v \n%v \n<pre>%s</pre>", resp.StatusCode, r.URL.String(), dmp, bts) if r.URL.Path == "" { r.URL.Path = "/" } var err2nd error resp, err2nd = client.Do(r) if err2nd != nil { return nil, inf, fmt.Errorf("again error %v \n%v", err2nd, err2) } if resp.StatusCode != http.StatusOK { inf.Status = resp.StatusCode return nil, inf, fmt.Errorf("again Status NotOK %v \n%v", resp.StatusCode, err2) } log.Printf("successful retry with '/' to %v after %v\n", r.URL.String(), err) err = nil // CLEAR error // return nil, inf, err2 } else { return nil, inf, fmt.Errorf("bad http resp code: %v - %v", resp.StatusCode, r.URL.String()) } } bts, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, inf, fmt.Errorf("cannot read resp body: %v", err) } defer resp.Body.Close() // time stamp var tlm time.Time // time last modified lm := resp.Header.Get("Last-Modified") if lm != "" { tlm, err = time.Parse(time.RFC1123, lm) // Last-Modified: Sat, 29 Aug 2015 21:15:39 GMT if err != nil { tlm, err = time.Parse(time.RFC1123Z, lm) // with numeric time zone if err != nil { var zeroTime time.Time tlm = zeroTime } } } inf.Mod = tlm // log.Printf(" hdr %v %v\n", lm, tlm.Format(time.ANSIC)) return bts, inf, nil }