func GetContentishe(category_url string) (url string, err error) { doc, err := goquery.NewDocument(category_url) if err != nil { return "", err } pNav := doc.Find("div.pagination_expanded > span.current").First() pageCount, _ := strconv.Atoi(pNav.Text()) rand.Seed(time.Now().Unix()) pageIndex := strconv.Itoa(rand.Intn(pageCount-1) + 1) doc, err = goquery.NewDocument(category_url + "/" + pageIndex) if err != nil { return "", err } contentishe := doc.Find(".image [src$='.gif']," + " .image [src$='.png']," + " .image [src$='.jpg']," + " .image [src$='.jpeg'] ") if contentishe.Length() == 0 { return "", errors.New("failed to find contentishe") } imgSrc, exist := contentishe.Eq(rand.Intn(contentishe.Length())).Attr("src") if !exist || imgSrc == "" { imgSrc, exist = contentishe.Eq(rand.Intn(contentishe.Length())).Attr("href") if !exist || imgSrc == "" { return "", errors.New("bad src and href") } } return imgSrc + "?.jpg", nil }
func posts(url string, lastModified time.Time) []Post { doc, err := goquery.NewDocument(url) errNotNilToPanic(err) lastPage := 0 doc.Find("ul").EachWithBreak(func(_ int, s *goquery.Selection) bool { if class, exist := s.Attr("class"); exist && class == "pagination" { if href, exist := s.Find("li").Find("a").Last().Attr("href"); exist { reg := regexp.MustCompile(".*page_num=([0-9]+)$") if m := reg.FindStringSubmatch(href); len(m) > 1 { lastPage, _ = strconv.Atoi(m[1]) } } return false } return true }) pList := []Post{} for page := 1; page <= lastPage; page++ { doc, err := goquery.NewDocument(url + "?comment_order=DESC&page_num=" + strconv.Itoa(page)) errNotNilToPanic(err) doc.Find("div").EachWithBreak(func(_ int, s *goquery.Selection) bool { if class, exist := s.Attr("class"); exist && class == "post-sla" { p := post(s) if !lastModified.Before(p.postDate) { return false } pList = append(pList, p) } return true }) } return pList }
func BroadcastIds(showId string, all bool) []string { continueUntilPage := 1 showUrl := "https://www.bbc.co.uk/programmes/" + showId + "/episodes/guide?page=" var broadcastIds []string showDoc, err := goquery.NewDocument(showUrl + strconv.Itoa(continueUntilPage)) if err != nil { log.Fatal(err) } if all { maxPage, _ := strconv.Atoi(showDoc.Find(".pagination__page--last a").Text()) if maxPage > 0 { continueUntilPage = maxPage } } for pageCount := 1; pageCount <= continueUntilPage; pageCount++ { if pageCount > 1 { showDoc, err = goquery.NewDocument(showUrl + strconv.Itoa(pageCount)) if err != nil { log.Fatal(err) } } broadcastIds = append(broadcastIds, showDoc.Find(".programme__titles a").Map(func(i int, s *goquery.Selection) string { broadcastLink, _ := s.Attr("href") return strings.Split(broadcastLink, "/")[2] })...) } return broadcastIds }
func parsePttBoardIndex(page int) (hrefs []string) { doc, err := goquery.NewDocument(EntryAddress) if err != nil { log.Fatal(err) } hrefs = make([]string, 0) maxPageNumberString := "" var PageWebSide string if page > 0 { // Find page result doc.Find(".btn-group a").Each(func(i int, s *goquery.Selection) { if strings.Contains(s.Text(), "上頁") { href, exist := s.Attr("href") if exist { targetString := strings.Split(href, "index")[1] targetString = strings.Split(targetString, ".html")[0] fmt.Println("total page:", targetString) maxPageNumberString = targetString } } }) pageNum, _ := strconv.Atoi(maxPageNumberString) pageNum = pageNum - page PageWebSide = fmt.Sprintf("https://www.ptt.cc/bbs/Beauty/index%d.html", pageNum) } else { PageWebSide = EntryAddress } doc, err = goquery.NewDocument(PageWebSide) if err != nil { log.Fatal(err) } doc.Find(".r-ent").Each(func(i int, s *goquery.Selection) { title := strings.TrimSpace(s.Find(".title").Text()) likeCount, _ := strconv.Atoi(s.Find(".nrec span").Text()) href, _ := s.Find(".title a").Attr("href") link := BasePttAddress + href hrefs = append(hrefs, link) fmt.Printf("%d:[%d★]%s\n", i, likeCount, title) }) // Print pages fmt.Printf("Pages: ") for i := page - 3; i <= page+2; i++ { if i >= 0 { if i == page { fmt.Printf("[%v] ", i) } else { fmt.Printf("%v ", i) } } } fmt.Printf("(o: open file in fider, s: top page, n:next, p:prev, quit: quit program)\n") return hrefs }
//Set CK101 board page index, fetch all post and return article count back func (p *CK101) ParseCK101PageByIndex(page int) int { doc, err := goquery.NewDocument(p.entryAddress) if err != nil { log.Fatal(err) } urlList := make([]string, 0) postList := make([]string, 0) starList := make([]int, 0) var PageWebSide string page = page + 1 //one base if page > 1 { // Find page result PageWebSide = fmt.Sprintf("http://ck101.com/forum-1345-%d.html", page) } else { PageWebSide = p.entryAddress } //fmt.Println("Page", PageWebSide) doc, err = goquery.NewDocument(PageWebSide) if err != nil { log.Fatal(err) } doc.Find(".cl_box").Each(func(i int, s *goquery.Selection) { star := "" title := "" url := "" starInt := 0 s.Find("a").Each(func(i int, tQ *goquery.Selection) { title, _ = tQ.Attr("title") url, _ = tQ.Attr("href") }) s.Find("em").Each(func(i int, starC *goquery.Selection) { star_c, _ := starC.Attr("title") fmt.Println("star_c:", star_c) if strings.Contains(star_c, "查看") { star = strings.Replace(star_c, "查看", "", -1) fmt.Println("star:", star) star = strings.TrimSpace(star) starInt, _ = strconv.Atoi(star) } //} }) urlList = append(urlList, url) starList = append(starList, starInt) postList = append(postList, title) }) p.storedPostURLList = urlList p.storedStarList = starList p.storedPostTitleList = postList return len(p.storedPostTitleList) }
func getPagina(url string) (doc *goquery.Document) { var err error doc, err = goquery.NewDocument(url) for i, maxIntentos := 0, 1000; err != nil && i < maxIntentos; i++ { doc, err = goquery.NewDocument(url) } if err != nil { mataPrograma(">>Error al obtener la url: "+url, err) } return }
//领事安全预警 func lsyj() { file := xlsx.NewFile() sheet := file.AddSheet("领事安全预警") doc, err := goquery.NewDocument(URL_LSYJ + "/default.shtml") if err != nil { log.Fatal(err) } totalUrl := 0 doc.Find(".ct3_m .news_list li a").Each(func(i int, contentSelection *goquery.Selection) { name := contentSelection.Text() if href, exists := contentSelection.Attr("href"); exists { href = URL_LSYJ + strings.Replace(href, ".", "", 1) err := parseLSYJ(sheet, href) if err != nil { fmt.Printf("[read error]第%d个:%s。url:%s。 %v", i+1, name, href, err) } totalUrl += 1 } }) for i := 1; i <= 11; i++ { url := fmt.Sprintf("/default_%d.shtml", i) fmt.Printf("第%d个URL:%s", i, url) doc, err := goquery.NewDocument(URL_LSYJ + url) if err != nil { log.Fatal(err) } doc.Find(".ct3_m .news_list li a").Each(func(i int, contentSelection *goquery.Selection) { name := contentSelection.Text() if href, exists := contentSelection.Attr("href"); exists { href = URL_LSYJ + strings.Replace(href, ".", "", 1) err := parseLSYJ(sheet, href) if err != nil { fmt.Printf("[read error]第%d个:%s。url:%s。 %v", i+1, name, href, err) } totalUrl += 1 } }) } fileName := fmt.Sprintf("领事安全预警(%d个).xlsx", totalUrl) err = file.Save(fileName) if err != nil { fmt.Printf(err.Error()) } }
func Rong360bbsCrawler() { beego.Info("Process rong360 bbs-yangmao.") for i := 1; i < 6; i++ { u := "http://bbs.rong360.com/forum-76-" + strconv.Itoa(i) + ".html" beego.Info("Process rong360 bbs-yangmao url: " + u) document, _ := goquery.NewDocument(u) //所有帖子 document.Find("table#threadlisttableid").Find("tbody").Each(func(i int, selection *goquery.Selection) { topic := &models.Topic{} topic.Node_id = 4 topic.Uid = 1 topic.Ord = time.Now().Unix() t := selection.Find("th").First().Find("a.s.xst") title := t.Text() if len(title) > 0 { topic.Title = title if titleUrl, f := t.Attr("href"); f { //获取帖子正文 c, _ := goquery.NewDocument(titleUrl) content := c.Find("div#postlist").First().Find("td.t_f").First() content.Find("img").Each(func(i int, se *goquery.Selection) { // 替换图片的src地址 if src, exists := se.Attr("file"); exists { se.SetAttr("src", "http://bbs.rong360.com/"+src) } }) html, _ := content.Html() topic.Content = html topic.Addtime = time.Now().Unix() topic.Updatetime = time.Now().Unix() (&models.TopicDao{}).InsertOrUpdate(topic) } } }) } }
// GetMovie finds shows with a title containg the keyword // Returns error if no show is found func GetMovie(keyword string) (*Movie, error) { if keyword == "" { return nil, ErrMissingArgument } doc, err := goquery.NewDocument("https://kat.cr/usearch/" + keyword) if err != nil { return nil, err } usearch := doc.Find(".torrentMediaInfo") if usearch.Length() < 1 { return nil, ErrMovieNotFound } titleLink := doc.Find("h1 > a.plain") title := titleLink.Text() if title == "" { return nil, ErrParsingFailure } url, ok := titleLink.Attr("href") if !ok { return nil, ErrParsingFailure } doc, err = goquery.NewDocument("https://kat.cr" + url) if err != nil { return nil, err } cover, ok := doc.Find(".movieCover > img").Attr("src") if !ok { return nil, ErrParsingFailure } magnets := make(map[string]string, 3) magnets["1080p"], _ = doc.Find("#tab-1080p i.ka-magnet").Parent().Attr("href") magnets["720p"], _ = doc.Find("#tab-720p i.ka-magnet").Parent().Attr("href") magnets["hdtv"], _ = doc.Find("#tab-HDRiP i.ka-magnet").Parent().Attr("href") return &Movie{ Title: title, URL: url, Cover: cover, Sources: magnets}, nil }
func SelfPage(cururl string) { x, _ := goquery.NewDocument(cururl) //获取标题 title := x.Find(".main-tags").Text() fmt.Println("标题:", title) //获取当前页可见图像 x.Find(".size-full").Each(func(idx int, s *goquery.Selection) { title2, b2 := s.Attr("title") if b2 == true { title = title2 } v, b := s.Attr("src") if b == true { if !strings.HasSuffix(v, "grey.jpg") { AddSpiderData(v, title) } } }) //获取翻页链接 x.Find(".link_pages").Each(func(idx int, s *goquery.Selection) { iurl, bl := s.Find("a").Attr("href") if bl == true { z, _ := goquery.NewDocument(iurl) //读取被打开的翻页页面内的可见图像 z.Find(".size-full").Each(func(idx int, s *goquery.Selection) { title2, b2 := s.Attr("title") if b2 == true { title = title2 } v, b := s.Attr("src") if b == true { if !strings.HasSuffix(v, "grey.jpg") { AddSpiderData(v, title) } } }) } }) }
func main() { urlMain := `http://www.kuaiyilicai.com` urlUpayCurrency := `http://www.kuaiyilicai.com/upcurrency.html` docUpayCurrency, err := goquery.NewDocument(urlUpayCurrency) checkError(err) // fmt.Println(` ** list all sorts of currency`) docUpayCurrency.Find(`ul.list-inline > li.itm`).Each( func(i int, selUpayCcurrency *goquery.Selection) { // fmt.Println(` ** get all url of every currency`) selUpayCcurrency.Find(`a`).Each( func(i int, sel_sort *goquery.Selection) { href, _ := sel_sort.Attr(`href`) if matched, _ := regexp.MatchString(`.*uprate.*`, href); matched { href = urlMain + href fmt.Println(href + ` | ` + selUpayCcurrency.Text()) // fmt.Println(` ** get data from every url of currency`) docEachCurrency, err := goquery.NewDocument(href) checkError(err) docEachCurrency.Find(`div.rate`).Each( func(i int, selEachCurrency *goquery.Selection) { eachCurrency := regexp.MustCompile(`\s`). ReplaceAllString(selEachCurrency.Text(), ``) if matched, _ := regexp.MatchString(`\d+\.\d+\/\d+\.\d+.*`, eachCurrency); matched { // fmt.Println(` ** match a format`) eachCurrency = regexp.MustCompile(`\d+\.\d+\/(\d+\.\d+)[^0-9]*(\d+)-(\d+).*`). ReplaceAllString(eachCurrency, `$2$3;$1`) } else { // fmt.Println(` ** not match the format`) eachReciprocalCurrency := regexp.MustCompile(`[^0-9]*(\d+\.\d+)[^0-9]*(\d+-\d+).*`). ReplaceAllString(eachCurrency, `$1`) eachCurrencyDate := regexp.MustCompile(`[^0-9]*(\d+\.\d+)[^0-9]*(\d+)-(\d+).*`). ReplaceAllString(eachCurrency, `$2$3`) f, err := strconv.ParseFloat(eachReciprocalCurrency, 32) checkError(err) eachReciprocalCurrency = strconv.FormatFloat(1/f, 'f', 4, 32) eachCurrency = eachCurrencyDate + `;` + eachReciprocalCurrency } fmt.Println( regexp.MustCompile(`(.*);(.*)` /*Date-%4d;Currency-%.4f*/). ReplaceAllString(eachCurrency, `$2;$1`)) }) } // else { fmt.Println(` ** not match url`) } }) }) }
func getBuildStatus(src string) (string, error) { doc, err := goquery.NewDocument(src) if err != nil { return "", err } cssPath := "#repo-info-tab > div.repository > table > tbody tr > td" var status string doc.Find(cssPath).Each(func(i int, s *goquery.Selection) { txt := s.Text() if txt == "Finished" || txt == "Error" { if status == "" { switch txt { case "Finished": status = "passing" case "Error": status = "failing" default: status = txt } } } }) return status, nil }
func GetSlideList(presentationURL string) ([]string, error) { //var slideContSelector string = ".slide_container" var slideImgSelector string = ".slide_image" var imgURLAttribute string = "data-full" doc, err := goquery.NewDocument(presentationURL) if err != nil { return nil, err } // allocate for a single slide, than extend it // for each slide that we find in the HTML page slideList := make([]string, 0, 0) // find the slide container in the web page // for each section in it, retrieve the img tag that contains the images' URL doc.Find(slideImgSelector).Each(func(i int, s *goquery.Selection) { // each children of the slide container is a section // each sections' children is an "img" tag if url, ok := s.Attr(imgURLAttribute); ok { slideList = append(slideList, url) } }) if len(slideList) == 0 { return nil, TagNotFoundError{"No slide sections in the HTML page!"} } return slideList, nil }
func (tpb *Thepiratebay) Search(query string, options Options) ([]*Result, error) { url := url.URL{ Scheme: "https", Host: "thepiratebay.org", Path: fmt.Sprintf("/search/%s/0/7/0", query), } doc, err := goquery.NewDocument(url.String()) if err != nil { return nil, err } ret := []*Result{} doc.Find("#SearchResults table#searchResult > tbody > tr").Each(func(i int, tr *goquery.Selection) { magnet, ok := tr.Find("a[href^=magnet]").Attr("href") if !ok { return } seeders, err := strconv.Atoi(tr.Find("td:nth-child(3)").Text()) if err != nil || seeders < 0 { return } ret = append(ret, &Result{ Name: tr.Find(".detName a.detLink").Text(), MagnetURL: magnet, Seeders: uint(seeders), }) }) return ret, nil }
func parseStartLink() { fmt.Println("Input url: ") fmt.Scanf("%s", &url) firstDoc, err := goquery.NewDocument(url) checkerr(err) firstDoc.Find("tbody").Each(func(i int, tbody *goquery.Selection) { tbody.Find(".description").Each(func(j int, s *goquery.Selection) { link, _ := s.Find("a").Attr("href") x, _ := regexp.MatchString(`https://www.exploit-db.com/exploits/.....`, link) if x == true { file, err := os.OpenFile("temp.txt", os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) checkerr(err) _, err = file.WriteString(link + "\n") checkerr(err) file.Close() } y, _ := regexp.MatchString(`/docs/......pdf`, link) if y == true { wasteUrl, err := os.OpenFile("waste.txt", os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) checkerr(err) _, err = wasteUrl.WriteString(link) checkerr(err) wasteUrl.Close() } }) }) }
func getDoc(url string) *goquery.Document { doc, err := goquery.NewDocument(url) if err != nil { log.Fatal(err) } return doc }
func (m *Mailbox) Flush() { id := func() string { doc, err := goquery.NewDocument(fmt.Sprintf(indexUrl, m.mail, 1)) if err != nil { log.Fatal(err) } idUrl, _ := doc.Find("div.um a.lm").First().Attr("href") re := regexp.MustCompile("mail.php.b=.*?id=(.*)") matches := re.FindStringSubmatch(idUrl) if len(matches) == 2 { return matches[1] } return "" }() if id != "" { http.Get(fmt.Sprintf(deleteUrl, m.mail, strings.TrimLeft(id, "m"))) } }
func scrapeTrendingRepos(language string, outDir string) { var doc *goquery.Document var err error filename := dateFilename("github_trending_repos", ".json") if outDir != "" { filename = path.Join(outDir, filename) } err = createFile(filename) if err != nil { Error.Println(err) return } f, err := os.OpenFile(filename, os.O_APPEND|os.O_WRONLY, 0600) defer f.Close() if err != nil { Error.Println(err) return } for _, period := range Periods { if doc, err = goquery.NewDocument(fmt.Sprintf("https://github.com/trending?l=%s&since=%s", language, period)); err != nil { Error.Println(err) } repos := readTrendingRepos(doc, period) err = writeRepos(f, repos) if err != nil { Error.Println(err) return } } }
func crawler(target string, workerNum int) { doc, err := goquery.NewDocument(target) if err != nil { panic(err) } title := doc.Find("h1#thread_subject").Text() dir := fmt.Sprintf("%v/%v - %v", baseDir, threadId.FindStringSubmatch(target)[1], title) os.MkdirAll(dir, 0755) linkChan := make(chan string) wg := new(sync.WaitGroup) for i := 0; i < workerNum; i++ { wg.Add(1) go worker(dir, linkChan, wg) } doc.Find("div[itemprop=articleBody] img").Each(func(i int, img *goquery.Selection) { imgUrl, _ := img.Attr("file") linkChan <- imgUrl }) close(linkChan) wg.Wait() }
func scrapeISO639() ([]iso639.Language, error) { // alpha-1 doc, err := goquery.NewDocument("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") if err != nil { return nil, err } all := []iso639.Language{} doc.Find("#mw-content-text > table.wikitable > tbody > tr").Each(func(i int, s *goquery.Selection) { var r iso639.Language r.Family = s.Find("td:nth-of-type(2) > a").Text() r.Name = s.Find("td:nth-of-type(3) > a").Text() r.NativeName = s.Find("td:nth-of-type(4)").Text() r.Code1 = s.Find("td:nth-of-type(5)").Text() r.Code2 = s.Find("td:nth-of-type(6)").Text() r.Code2B = s.Find("td:nth-of-type(7)").Text() r.Code3 = s.Find("td:nth-of-type(8)").Text() r.Code6 = s.Find("td:nth-of-type(9)").Text() all = append(all, r) }) // TODO: scrape alpha3 return all, nil }
func Crawl(url string, urls chan []string) (links []string) { doc, err := goquery.NewDocument(url) errorify(err) doc.Find("title").Each(func(i int, s *goquery.Selection) { value := s.Text() fmt.Println(value) }) links = make([]string, 0, 100) doc.Find("a").Each(func(i int, s *goquery.Selection) { link, exists := s.Attr("href") // only take wikipedia links to real pages if exists { switch { case strings.Contains(link, "edit") || strings.Contains(link, "disambiguation"): return case strings.HasPrefix(link, "//"): return case strings.HasPrefix(link, "#"): return case strings.HasPrefix(link, "/") && strings.Contains(link, "wiki"): links = append(links, "http://en.wikipedia.org"+link) default: return } } }) urls <- links return }
func main() { fmt.Println("starting...") flag.Parse() var doc *goquery.Document var err error if doc, err = goquery.NewDocument("http://science.nasa.gov/missions/?group=all"); err != nil { log.Fatal("Failed to fetch page") } doc.Find(".missions").Find("tbody").Children().Each(func(i int, s *goquery.Selection) { m := unpackMission(s) if m.Phase == "Operating" { missions = append(missions, m) } }) if *asJson == true { b, err := json.Marshal(missions) if err != nil { log.Fatal(err) } os.Stdout.Write(b) } else { for _, m := range missions { fmt.Println(m) } } }
func (k *Kickass) Search(query string, options Options) ([]*Result, error) { url := url.URL{ Scheme: "https", Host: "kat.cr", Path: fmt.Sprintf("/usearch/%s/", query), RawQuery: "field=seeders&sorder=desc", } doc, err := goquery.NewDocument(url.String()) if err != nil { return nil, err } ret := []*Result{} doc.Find("#mainSearchTable table.data tr[id]").Each(func(i int, s *goquery.Selection) { magnet, ok := s.Find("a[title='Torrent magnet link']").Attr("href") if !ok { return } seeders, err := strconv.Atoi(s.Find("td:nth-child(5)").Text()) if err != nil || seeders < 0 { return } ret = append(ret, &Result{ Name: s.Find(".cellMainLink").Text(), MagnetURL: magnet, Seeders: uint(seeders), }) }) return ret, nil }
func scrapeMostStarredRepos(language string, outDir string) { var doc *goquery.Document var err error filename := dateFilename("github_most_starred", ".json") if outDir != "" { filename = path.Join(outDir, filename) } err = createFile(filename) if err != nil { Error.Println(err) return } f, err := os.OpenFile(filename, os.O_APPEND|os.O_WRONLY, 0600) defer f.Close() if err != nil { Error.Println(err) return } for i := 1; i <= 5; i++ { if doc, err = goquery.NewDocument(fmt.Sprintf("https://github.com/search?q=stars:>1&type=Repositories&l=%s&p=%d", language, i)); err != nil { Error.Println(err) } repos := readMostStarredRepos(doc) err = writeRepos(f, repos) if err != nil { Error.Println(err) return } } }
// Get location (name, lat and lng) // via NUS Web func getLocationInfoNUS(query string) ([]LocationInfo, error) { url := fmt.Sprintf("http://map.nus.edu.sg/index.php/search/by/%s", query) doc, err := goquery.NewDocument(url) if err != nil { return nil, err } var locations []LocationInfo s := doc.Find("#search_list a[href=\"javascript:void(0)\"]").First() onclick, _ := s.Attr("onclick") regex := regexp.MustCompile("long=([0-9\\.]+?)&lat=([0-9\\.]+?)'") matches := regex.FindAllStringSubmatch(onclick, -1) if len(matches) == 0 || len(matches[0]) != 3 { return nil, fmt.Errorf("Can't find lat and lng from query: %s", query) } x, _ := strconv.ParseFloat(matches[0][1], 64) y, _ := strconv.ParseFloat(matches[0][2], 64) location := LocationInfo{ Name: s.Text(), Lng: x, Lat: y, } locations = append(locations, location) return locations, nil }
// prepScrapeCinemaMovies prepares the actual URL for movie showtimes at a particular cinema, then // calls the actual scraping function. func prepScrapeCinemaMovies(url string, context interface{}, cinemas chan<- []*data.Cinema, movies chan<- []*data.Movie) { var doc *gq.Document var err error log.Println("Retrieving document for " + url) if doc, err = gq.NewDocument(url); err != nil { log.Fatal(err) } allText, err := doc.Html() startIdx := strings.Index(allText, "buyTickets2") if startIdx > -1 { locIdx := strings.Index(allText[startIdx:], "loc=") endLoc := strings.Index(allText[startIdx+locIdx:], "&") loc := allText[startIdx+locIdx+4 : startIdx+locIdx+endLoc] go scrapeCinemaMovies(BASE+"/buyTickets2.jsp?loc="+loc+"&date="+time.Now().Format("02-01-2006"), context, cinemas, movies) } else { log.Fatalf("No available source URL") } }
func main() { doc, err := goquery.NewDocument(url) if err != nil { panic(err) } preElems := doc.Find("#mw-content-text > pre") if preElems.Length() != PreCount { panic("Did not find enough elements on the page.") } preElems.Each(func(i int, sel *goquery.Selection) { prees[PreType(i)] = sel.Text() }) tableB := parseSBoxTable(prees[SBoxTable]) bytesB := parseSBoxBytes(prees[SBoxBytes]) if !bytes.Equal(tableB, bytesB) { panic("SBoxTable and SBoxBytes do not match") } iTableB := parseSBoxTable(prees[SBoxInvTable]) iBytesB := parseSBoxBytes(prees[SBoxInvBytes]) if !bytes.Equal(iTableB, iBytesB) { panic("SBoxInvTable and SBoxInvBytes do not match.") } fmt.Println("Everything matches.") }
func getAllData(link string, i int) { fmt.Println(link) doc, err := goquery.NewDocument(link) if err != nil { fmt.Println(err) } j := 0 doc.Find(".s-result-item").Each(func(i int, s *goquery.Selection) { j++ linkHref, exists := s.Find("a").Attr("href") if !exists { fmt.Println("no href") } //fmt.Println(linkHref) all := digitsRegexp.FindStringSubmatch(linkHref) fmt.Println(all[1]) }) chanM <- i }
func scanFolder(url string) { doc, err := goquery.NewDocument(url) if err != nil { log.Fatal(err) } doc.Find("a").Each(func(i int, s *goquery.Selection) { href, _ := s.Attr("href") if strings.HasSuffix(href, ".zip") { c := pool.Borrow() go func() { defer pool.Return(c) filename := download(url, href) if filename != "" { process(filename) } }() } if !strings.HasPrefix(href, "/") && strings.HasSuffix(href, "/") { log.Printf("%+v", href) scanFolder(url + href) } }) }
// ARSOPotresi returs slice of Potres struct func ARSOPotresi() []Potres { var potresi []Potres var doc *goquery.Document var e error if res, found := cacheArso.Get("potresi"); found { return res.([]Potres) } if doc, e = goquery.NewDocument("http://www.arso.gov.si/potresi/obvestila%20o%20potresih/aip/"); e != nil { return potresi } doc.Find("#glavna td.vsebina table tr").Each(func(i int, s *goquery.Selection) { magnituda, err := strconv.ParseFloat(s.Find("td:nth-child(4)").Text(), 2) if magnituda > 0 && err == nil { potres := Potres{} potres.Magnituda = magnituda potres.Lat, _ = strconv.ParseFloat(s.Find("td:nth-child(2)").Text(), 3) potres.Lon, _ = strconv.ParseFloat(s.Find("td:nth-child(3)").Text(), 3) potres.Lokacija = s.Find("td:nth-child(6)").Text() potres.Datum = s.Find("td:nth-child(1)").Text() potresi = append(potresi, potres) } }) cacheArso.Set("potresi", potresi, cache.DefaultExpiration) return potresi }