Beispiel #1
0
// send uses the given *http.Request to make an HTTP request.
func (bow *Browser) httpRequest(req *http.Request) error {
	bow.preSend()
	resp, err := bow.buildClient().Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode == 503 && resp.Header.Get("Server") == "cloudflare-nginx" {
		if !bow.solveCF(resp, req.URL) {
			return fmt.Errorf("Page protected with cloudflare with unknown algorythm")
		} else {
			return nil
		}
	}

	content_type := resp.Header.Get("Content-Type")

	if resp.StatusCode != 403 {
		if content_type == "text/html; charset=GBK" {
			enc := mahonia.NewDecoder("gbk")
			e := enc.NewReader(resp.Body)
			bow.body, err = ioutil.ReadAll(e)
			if err != nil {
				return err
			}
		} else {
			fixedBody, err := charset.NewReader(resp.Body, content_type)
			if err == nil {
				bow.body, err = ioutil.ReadAll(fixedBody)
				if err != nil {
					return err
				}
			} else {
				bow.body, err = ioutil.ReadAll(resp.Body)
				if err != nil {
					return err
				}
			}
		}
		bow.contentConversion(content_type)
	} else {
		bow.body = []byte(`<html></html>`)
	}

	buff := bytes.NewBuffer(bow.body)
	dom, err := goquery.NewDocumentFromReader(buff)
	if err != nil {
		return err
	}

	bow.history.Push(bow.state)
	bow.state = jar.NewHistoryState(req, resp, dom)
	bow.postSend()

	return nil
}
Beispiel #2
0
// Solve CloudFlare
func (bow *Browser) solveCF(resp *http.Response, rurl *url.URL) bool {
	if strings.Contains(rurl.String(), "chk_jschl") {
		// We are in deadloop
		return false
	}
	time.Sleep(time.Duration(5) * time.Second)
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return false
	}
	buff := bytes.NewBuffer(body)
	dom, err := goquery.NewDocumentFromReader(buff)

	host := rurl.Host

	js := dom.Find("script:contains(\"s,t,o,p,b,r,e,a,k,i,n,g\")").Text()

	re1 := regexp.MustCompile("setTimeout\\(function\\(\\){\\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\\r?\\n[\\s\\S]+?a\\.value =.+?)\\r?\\n")
	re2 := regexp.MustCompile("a\\.value = (parseInt\\(.+?\\)).+")
	re3 := regexp.MustCompile("\\s{3,}[a-z](?: = |\\.).+")
	re4 := regexp.MustCompile("[\\n\\\\']")

	js = re1.FindAllStringSubmatch(js, -1)[0][1]
	js = re2.ReplaceAllString(js, re2.FindAllStringSubmatch(js, -1)[0][1])
	js = re3.ReplaceAllString(js, "")
	js = re4.ReplaceAllString(js, "")
	js = strings.Replace(js, "return", "", -1)

	jsEngine := otto.New()
	data, err := jsEngine.Eval(js)
	if err != nil {
		return false
	}
	checksum, err := data.ToInteger()
	if err != nil {
		return false
	}
	checksum += int64(len(host))
	if err != nil {
		return false
	}

	jschlVc, _ := dom.Find("input[name=\"jschl_vc\"]").Attr("value")
	pass, _ := dom.Find("input[name=\"pass\"]").Attr("value")
	jschlAnswer := strconv.Itoa(int(checksum))

	u := rurl.Scheme + "://" + rurl.Host + "/cdn-cgi/l/chk_jschl"
	ur, err := url.Parse(u)
	q := ur.Query()
	q.Add("jschl_vc", jschlVc)
	q.Add("pass", pass)
	q.Add("jschl_answer", jschlAnswer)
	ur.RawQuery = q.Encode()

	bow.DelRequestHeader("Cookie")
	bow.DelRequestHeader("Referer")
	bow.AddRequestHeader("Referer", rurl.String())

	cjar := bow.GetCookieJar()
	cook := cjar.Cookies(rurl)
	if cook != nil {
		for _, co := range cook {
			bow.AddRequestHeader("Cookie", co.Name+"="+co.Value)
		}
	}
	bow.Open(ur.String())

	if bow.refresh != nil {
		bow.refresh.Stop()
	}
	return true
}