func MakeRequest(url string) *browser.Browser { // Open a new browser needs to be a browser so run js bow := surf.NewBrowser() err := bow.Open(url) if err != nil { panic(err) } return bow }
func scrape(page string, p patron) (items []an_item) { bow := surf.NewBrowser() historyJar := jar.NewMemoryHistory() bow.SetHistoryJar(historyJar) // grab main page and click to login page err := bow.Open(page) if err != nil { panic(err) } fmt.Println(bow.Title()) err = bow.Click("a:contains('My Account')") if err != nil { panic(err) } fmt.Println(bow.Title()) // populate login form and submit form, err := bow.Form("form[action='/Mobile/MyAccount/Logon']") if err != nil { panic(err) } fmt.Println("logging in", p.Name) form.Input("barcodeOrUsername", p.Card) form.Input("password", p.Pin) fmt.Println(form) err = form.Submit() if err != nil { panic(err) } items_out := bow.Find("a[href='/Mobile/MyAccount/ItemsOut']") num_items_out, err := strconv.Atoi(strings.Split(items_out.Text(), " ")[0]) if err != nil { panic(err) } if num_items_out > 0 { items = getItemsOut(bow) return items } return nil }
// сохраняет в файл csv результат запроса в файл с именем namef func savehttptocsv(namef string, suri string, suri2 string) int { // Create a new browser and open reddit. bow := surf.NewBrowser() err := bow.Open(suri) if err != nil { panic(err) } err = bow.Open(suri2) if err != nil { panic(err) } rescsv := bow.Body() savestrtofile("report.csv", rescsv) return 0 }
func scrape() (SolarOSReading, error) { bow := surf.NewBrowser() var reading SolarOSReading url, err := getScript() if err != nil { return SolarOSReading{}, err } err = bow.Open(url) if err != nil || bow.StatusCode() != 200 { return SolarOSReading{}, err } body := bow.Body() lifemeter := meter(body) if lifemeter != "" { reading.LifeMeter = lifemeter } reading.MoneySaved = dollarsSaved(body) reading.InstantaneousPower = instantMeter(body) reading.TreesSaved = treesSaved(body) reading.OilOffset = oilOffset(body) reading.CO2Offset = co2Offset(body) return reading, nil }
func getScript() (string, error) { // logs into SolarOS and scrapes the page, returning a usable URL bow := surf.NewBrowser() err := bow.Open("https://solaros.datareadings.com/") if err != nil || bow.StatusCode() != 200 { return "", err } fm := bow.Forms()[1] username := fm.Dom().Find("input").Nodes[0].Attr[3].Val password := fm.Dom().Find("input").Nodes[1].Attr[3].Val fm.Input(username, config.Username) fm.Input(password, config.Password) err = fm.Submit() if err != nil || bow.StatusCode() != 200 { return "", nil } cookie := fmt.Sprintf("%v", bow.SiteCookies()[0]) cookie = strings.SplitN(cookie, "=", 2)[1] body := bow.Body() page := liftPage(body) watch := toWatch(body) url := dataURL(cookie, page, watch) return url, nil }
func main() { flag.Parse() spew.Config.Indent = " " fmt.Printf("Version: %v\nUrl: %v\n", Version, Url) mech := surf.NewBrowser() err := mech.Open(Url) check(err) body := mech.Body() //doc, err := html.Parse(strings.NewReader(s)) //doc, err := html.Parse(strings.NewReader(body)) doc, err := gokogiri.ParseHtml([]byte(body)) //doc, err := gokogiri.ParseHtml([]byte(body)) check(err) div := doc.NodeById("middleContainer") //fmt.Printf("TypeOf div: %v\n", reflect.TypeOf(div)) fmt.Printf("TypeOf div: %T\n", div) //spew.Printf("Div: %+v\n", div) //os.Exit(0) //content := string(div.Content()) //fmt.Printf("Div: %+v\n", div.Content()) //fmt.Println(content) //fmt.Println(div.String()) if err := os.MkdirAll(*dir, os.ModePerm); err != nil { log.Fatal(err) } /* b := bytes.NewBufferString(string(doc.Data)) //b := bytes.NewBufferString(string(body)) //b := bytes.NewBufferString(string(div.String())) z := html.NewTokenizer(b) depth := 0 for { tt := z.Next() fmt.Println("======================================================================") spew.Printf("z: %+v\n", z) fmt.Println("======================================================================") switch { case tt == html.ErrorToken: // End of the document, we're done return // /* // case tt == html.StartTagToken: // t := z.Token() // // isAnchor := t.Data == "a" // if isAnchor { // fmt.Println("We found a link!") // spew.Printf("Data: %+v\n", t) // } // case tt == html.TextToken: if depth > 0 { // emitBytes should copy the []byte it receives, // if it doesn't process it immediately. fmt.Println("*********") fmt.Println(z.Token()) fmt.Println("*********") } case tt == html.StartTagToken, tt == html.EndTagToken: tn, _ := z.TagName() if len(tn) == 1 && tn[0] == 'a' { if tt == html.StartTagToken { depth++ } else { depth-- } } } } */ //Rows of interest have the form: // <a href="/1577/" title="2015-9-14">Advent</a><br> }
// NewBrowser is generator for Browser func NewBrowser(email, password string) *Browser { bw := surf.NewBrowser() bw.SetUserAgent(userAgent) return &Browser{Browser: bw, Email: email, Password: password} }