// send uses the given *http.Request to make an HTTP request. func (bow *Browser) httpRequest(req *http.Request) error { bow.preSend() resp, err := bow.buildClient().Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode == 503 && resp.Header.Get("Server") == "cloudflare-nginx" { if !bow.solveCF(resp, req.URL) { return fmt.Errorf("Page protected with cloudflare with unknown algorythm") } else { return nil } } content_type := resp.Header.Get("Content-Type") if resp.StatusCode != 403 { if content_type == "text/html; charset=GBK" { enc := mahonia.NewDecoder("gbk") e := enc.NewReader(resp.Body) bow.body, err = ioutil.ReadAll(e) if err != nil { return err } } else { fixedBody, err := charset.NewReader(resp.Body, content_type) if err == nil { bow.body, err = ioutil.ReadAll(fixedBody) if err != nil { return err } } else { bow.body, err = ioutil.ReadAll(resp.Body) if err != nil { return err } } } bow.contentConversion(content_type) } else { bow.body = []byte(`<html></html>`) } buff := bytes.NewBuffer(bow.body) dom, err := goquery.NewDocumentFromReader(buff) if err != nil { return err } bow.history.Push(bow.state) bow.state = jar.NewHistoryState(req, resp, dom) bow.postSend() return nil }
// Solve CloudFlare func (bow *Browser) solveCF(resp *http.Response, rurl *url.URL) bool { if strings.Contains(rurl.String(), "chk_jschl") { // We are in deadloop return false } time.Sleep(time.Duration(5) * time.Second) body, err := ioutil.ReadAll(resp.Body) if err != nil { return false } buff := bytes.NewBuffer(body) dom, err := goquery.NewDocumentFromReader(buff) host := rurl.Host js := dom.Find("script:contains(\"s,t,o,p,b,r,e,a,k,i,n,g\")").Text() re1 := regexp.MustCompile("setTimeout\\(function\\(\\){\\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\\r?\\n[\\s\\S]+?a\\.value =.+?)\\r?\\n") re2 := regexp.MustCompile("a\\.value = (parseInt\\(.+?\\)).+") re3 := regexp.MustCompile("\\s{3,}[a-z](?: = |\\.).+") re4 := regexp.MustCompile("[\\n\\\\']") js = re1.FindAllStringSubmatch(js, -1)[0][1] js = re2.ReplaceAllString(js, re2.FindAllStringSubmatch(js, -1)[0][1]) js = re3.ReplaceAllString(js, "") js = re4.ReplaceAllString(js, "") js = strings.Replace(js, "return", "", -1) jsEngine := otto.New() data, err := jsEngine.Eval(js) if err != nil { return false } checksum, err := data.ToInteger() if err != nil { return false } checksum += int64(len(host)) if err != nil { return false } jschlVc, _ := dom.Find("input[name=\"jschl_vc\"]").Attr("value") pass, _ := dom.Find("input[name=\"pass\"]").Attr("value") jschlAnswer := strconv.Itoa(int(checksum)) u := rurl.Scheme + "://" + rurl.Host + "/cdn-cgi/l/chk_jschl" ur, err := url.Parse(u) q := ur.Query() q.Add("jschl_vc", jschlVc) q.Add("pass", pass) q.Add("jschl_answer", jschlAnswer) ur.RawQuery = q.Encode() bow.DelRequestHeader("Cookie") bow.DelRequestHeader("Referer") bow.AddRequestHeader("Referer", rurl.String()) cjar := bow.GetCookieJar() cook := cjar.Cookies(rurl) if cook != nil { for _, co := range cook { bow.AddRequestHeader("Cookie", co.Name+"="+co.Value) } } bow.Open(ur.String()) if bow.refresh != nil { bow.refresh.Stop() } return true }