func TestNotRanWhenStopped(t *testing.T) { ran := false testSpider := spider.Get("http://google.com", func(ctx *spider.Context) error { ran = true return nil }) dur := 1*time.Second + 100*time.Millisecond stopCh := make(chan struct{}) sched := spider.NewScheduler() sched.Add(schedule.Every(1*time.Second), testSpider) sched.Start() go func() { sched.Stop() stopCh <- struct{}{} }() select { case <-time.After(dur): t.Error("Should not wait to much") case <-stopCh: if ran { t.Error("Spider ran but should not run when stopped") } } }
func loadSelfSpider() { SelfSpider = spider.Get("http://"+ExternalIP+"/", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("SelfSpider") if _, err := ctx.DoRequest(); err != nil { fmt.Println(err) return err } htmlparser, err := ctx.HTMLParser() if err != nil { fmt.Println(err) return err } html, _ := htmlparser.Html() p := SelfData{html, time.Now().String()} err = p.save() if err != nil { return fmt.Errorf("error saving SelfData") } return nil }) }
func init() { ScienceNewsSpider = spider.Get("http://www.sciencemag.org/news", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("ScienceNewsSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p ScienceNewsGroup p.Data = []ScienceNews{} htmlparser.Find(`div[class="media__body"]`).Each(func(i int, s *goquery.Selection) { title := strings.TrimSpace(s.Find("h2").Text()) url, _ := s.Find("h2 > a").Attr("href") url = "http://www.sciencemag.org" + url summary := "None" p.Data = append(p.Data, ScienceNews{title, summary, url}) }) // Open() err = p.save() // Close() if err != nil { return fmt.Errorf("error saving ScienceNewsGroup") } return nil }) }
func main() { wikiHTMLSpider := &WikipediaHTMLSpider{"Albert Einstein"} wikiJSONSpider := &WikipediaJSONSpider{"Lionel Messi"} spider.Add(schedule.Every(7*time.Second), wikiHTMLSpider) spider.Add(schedule.Every(9*time.Second), wikiJSONSpider) spider.Add(schedule.Every(5*time.Second), spider.Get("https://google.com", func(ctx *spider.Context) error { _, err := ctx.DoRequest() if err != nil { fmt.Println("Made request to google") } return nil })) spider.Start() <-time.After(26 * time.Second) }
func TestInMemory(t *testing.T) { ran := false testSpider := spider.Get("http://google.com", func(ctx *spider.Context) error { ran = true return nil }) sched := spider.NewScheduler() sched.Add(schedule.Every(1*time.Second), testSpider) sched.Start() dur := 1*time.Second + 500*time.Millisecond select { case <-time.After(dur): if !ran { t.Error("spider not ran") } } sched.Stop() }
func init() { InTheatersSpider = spider.Get("http://www.metacritic.com/browse/movies/release-date/theaters/date", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("InTheatersSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p InTheatersGroup p.Data = []Movie{} htmlparser.Find(`div[class="product_wrap group_product_wrap"]`).Each(func(i int, s *goquery.Selection) { title := strings.TrimSpace(s.Find("a").Text()) score, _ := strconv.Atoi(s.Find(`div[class*="metascore_w"]`).Text()) date := strings.TrimSpace(s.Find(`span[class="data"]`).Text()) _, month, _ := time.Now().Date() curMonth := month.String()[0:3] _, month, _ = time.Now().Add(time.Hour * -24 * 30).Date() lastMonth := month.String()[0:3] if (strings.Contains(date, curMonth) || strings.Contains(date, lastMonth)) && score > 60 { urlComponent := strings.Join(strings.Split("showtimes "+title, " "), "+") url := "https://www.google.com/?q=" + urlComponent + "#safe=active&q=" + urlComponent // fmt.Printf("%d: %s %d %s %s\n", i, title, score, date, url) p.Data = append(p.Data, Movie{title, date, score, url}) } }) // Open() err = p.save() // Close() if err != nil { return fmt.Errorf("error saving InTheatersGroup") } return nil }) }
func init() { ApNewsSpider = spider.Get("http://hosted.ap.org/dynamic/fronts/HOME?SITE=AP&SECTION=HOME", func(ctx *spider.Context) error { fmt.Println(time.Now()) // Execute the request if _, err := ctx.DoRequest(); err != nil { return err } // Get goquery's html parser htmlparser, err := ctx.HTMLParser() if err != nil { return err } htmlparser.Find(`p[class="ap-newsbriefitem-p"]`).Each(func(i int, s *goquery.Selection) { title := s.Find("a").Text() link, _ := s.Find("a").Attr("href") summary := s.Find(`span[class="topheadlinebody"]`).Text() fmt.Printf("%d: %s (%s) - %s\n", i, title, link, summary) }) return nil }) }
func init() { HackerNewsSpider = spider.Get("https://news.ycombinator.com/", func(ctx *spider.Context) error { fmt.Println(time.Now()) // Execute the request if _, err := ctx.DoRequest(); err != nil { return err } // Get goquery's html parser htmlparser, err := ctx.HTMLParser() if err != nil { return err } title := "" origin := "" url := "" score := 0 haveTitle := false htmlparser.Find(`tr`).Each(func(i int, s *goquery.Selection) { // fmt.Println(i) if math.Mod(float64(i), 2) == 0 { title = s.Find(`td[class="title"] > a`).Text() origin = s.Find(`span[class="sitebit comhead"]`).Text() url, _ = s.Find(`td[class="title"] > a`).Attr("href") haveTitle = true } else if math.Mod(float64(i), 2) == 1 && haveTitle == true { score, _ = strconv.Atoi(strings.Split(s.Find(`span[class="score"]`).Text(), " ")[0]) if score > 10 { fmt.Println(title, origin, url, score) } haveTitle = false } }) return nil }) }
func init() { WeatherSpider = spider.Get("http://forecast.weather.gov/MapClick.php?lat=35.9804&lon=-78.915&lg=english&&FcstType=digital", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("WeatherSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p WeatherData p.High = 0 p.Low = 100 p.Rain = 0 gotTemperature := false gotRain := false isTemperature := false isRain := false htmlparser.Find(`tr[align="center"]`).Each(func(i int, s *goquery.Selection) { // fmt.Println(s.Text()) s.Find(`td`).Each(func(j int, t *goquery.Selection) { if strings.TrimSpace(t.Text()) == "Thunder" || strings.TrimSpace(t.Text()) == "Dewpoint (°F)" { isRain = false isTemperature = false } if isTemperature == true { temp, _ := strconv.Atoi(t.Text()) if temp > p.High { p.High = temp } if temp < p.Low { p.Low = temp } } if strings.TrimSpace(t.Text()) == "Temperature (°F)" && gotTemperature == false { // fmt.Println(t.Text()) isTemperature = true gotTemperature = true } if isRain == true { if strings.TrimSpace(t.Text()) != "--" { rain, _ := strconv.Atoi(t.Text()) p.Rain += rain } } if strings.TrimSpace(t.Text()) == "Rain" && gotRain == false { // fmt.Println(t.Text()) isRain = true gotRain = true } }) // p.Data = append(p.Data, ScienceNews{title, summary, url}) }) // Open() err = p.save() // Close() if err != nil { return fmt.Errorf("error saving WeatherData") } return nil }) }
func init() { GroceriesSpider = spider.Get("http://cowyo.com/grocerylist", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("GroceriesSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p CowyoGroup p.Data = []CowyoItem{} dats := htmlparser.Find(`textarea `).Text() for _, dat := range strings.Split(dats, "\n") { if len(dat) > 0 { p.Data = append(p.Data, CowyoItem{dat}) } } err = p.save("GroceryList") if err != nil { return fmt.Errorf("error saving CowyoGroup") } return nil }) CoolListSpider = spider.Get("http://cowyo.com/cool", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("CoolListSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p CowyoGroup p.Data = []CowyoItem{} dats := htmlparser.Find(`textarea `).Text() for _, dat := range strings.Split(dats, "\n") { if len(dat) > 0 { p.Data = append(p.Data, CowyoItem{dat}) } } err = p.save("CoolList") if err != nil { return fmt.Errorf("error saving CowyoGroup") } return nil }) TodoListSpider = spider.Get("http://cowyo.com/todo", func(ctx *spider.Context) error { fmt.Print(time.Now()) fmt.Println("TodoListSpider") if _, err := ctx.DoRequest(); err != nil { return err } htmlparser, err := ctx.HTMLParser() if err != nil { return err } var p CowyoGroup p.Data = []CowyoItem{} dats := htmlparser.Find(`textarea `).Text() for _, dat := range strings.Split(dats, "\n") { if len(dat) > 0 { p.Data = append(p.Data, CowyoItem{dat}) } } err = p.save("TodoList") if err != nil { return fmt.Errorf("error saving TodoList") } return nil }) }