Beispiel #1
0
// GetFeed gets a URL and returns a http.Response.
// Sets a reasonable timeout on the connection and read from the server.
// Users will need to Close() the resposne.Body or risk leaking connections.
func GetFeed(url string, client *http.Client) (*http.Response, error) {
	logrus.Infof("Crawling %v", url)

	// Defaults to 1 second for connect and read
	connectTimeout := (5 * time.Second)
	readWriteTimeout := (15 * time.Second)

	if client == nil {
		client = httpclient.NewTimeoutClient(connectTimeout, readWriteTimeout)
	}

	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		logrus.Errorf("Error creating request: %v", err)
		return nil, err
	}
	req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")

	r, err := client.Do(req)

	if err != nil {
		logrus.Infof("Error getting %s: %s", url, err)
		return r, err
	}
	if r.StatusCode != http.StatusOK {
		err = fmt.Errorf("feed %s returned a non 200 status code: %s", url, r.Status)
		logrus.Info(err)
		return r, err
	}
	return r, nil
}
Beispiel #2
0
func getURL(url string) ([]byte, error) {
	// Defaults to 1 second for connect and read
	connectTimeout := (5 * time.Second)
	readWriteTimeout := (15 * time.Second)

	client := httpclient.NewTimeoutClient(connectTimeout, readWriteTimeout)

	resp, err := client.Get(url)

	if err != nil {
		logrus.Infof("Error getting %s: %s", url, err)
		return nil, err
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		err = fmt.Errorf("feed %s returned a non 200 status code: %s", url, resp.Status)
		logrus.Error(err)
		return nil, err
	}
	var b []byte
	if resp.ContentLength > 0 {
		b = make([]byte, resp.ContentLength)
		_, err := io.ReadFull(resp.Body, b)
		if err != nil {
			return nil, fmt.Errorf("error reading response for %s: %s", url, err)
		}
	} else {
		b, err = ioutil.ReadAll(resp.Body)
		if err != nil {
			return nil, fmt.Errorf("error reading response for %s: %s", url, err)
		}
	}
	return b, nil
}