// GetFeed gets a URL and returns a http.Response. // Sets a reasonable timeout on the connection and read from the server. // Users will need to Close() the resposne.Body or risk leaking connections. func GetFeed(url string, client *http.Client) (*http.Response, error) { logrus.Infof("Crawling %v", url) // Defaults to 1 second for connect and read connectTimeout := (5 * time.Second) readWriteTimeout := (15 * time.Second) if client == nil { client = httpclient.NewTimeoutClient(connectTimeout, readWriteTimeout) } req, err := http.NewRequest("GET", url, nil) if err != nil { logrus.Errorf("Error creating request: %v", err) return nil, err } req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") r, err := client.Do(req) if err != nil { logrus.Infof("Error getting %s: %s", url, err) return r, err } if r.StatusCode != http.StatusOK { err = fmt.Errorf("feed %s returned a non 200 status code: %s", url, r.Status) logrus.Info(err) return r, err } return r, nil }
func getURL(url string) ([]byte, error) { // Defaults to 1 second for connect and read connectTimeout := (5 * time.Second) readWriteTimeout := (15 * time.Second) client := httpclient.NewTimeoutClient(connectTimeout, readWriteTimeout) resp, err := client.Get(url) if err != nil { logrus.Infof("Error getting %s: %s", url, err) return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { err = fmt.Errorf("feed %s returned a non 200 status code: %s", url, resp.Status) logrus.Error(err) return nil, err } var b []byte if resp.ContentLength > 0 { b = make([]byte, resp.ContentLength) _, err := io.ReadFull(resp.Body, b) if err != nil { return nil, fmt.Errorf("error reading response for %s: %s", url, err) } } else { b, err = ioutil.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("error reading response for %s: %s", url, err) } } return b, nil }