Beispiel #1
0
func TestNotRanWhenStopped(t *testing.T) {
	ran := false
	testSpider := spider.Get("http://google.com", func(ctx *spider.Context) error {
		ran = true
		return nil
	})
	dur := 1*time.Second + 100*time.Millisecond
	stopCh := make(chan struct{})

	sched := spider.NewScheduler()
	sched.Add(schedule.Every(1*time.Second), testSpider)
	sched.Start()

	go func() {
		sched.Stop()
		stopCh <- struct{}{}
	}()

	select {
	case <-time.After(dur):
		t.Error("Should not wait to much")
	case <-stopCh:
		if ran {
			t.Error("Spider ran but should not run when stopped")
		}
	}
}
Beispiel #2
0
func loadSelfSpider() {
	SelfSpider = spider.Get("http://"+ExternalIP+"/", func(ctx *spider.Context) error {

		fmt.Print(time.Now())
		fmt.Println("SelfSpider")
		if _, err := ctx.DoRequest(); err != nil {
			fmt.Println(err)
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			fmt.Println(err)
			return err
		}
		html, _ := htmlparser.Html()
		p := SelfData{html, time.Now().String()}

		err = p.save()
		if err != nil {
			return fmt.Errorf("error saving SelfData")
		}

		return nil
	})
}
func init() {
	ScienceNewsSpider = spider.Get("http://www.sciencemag.org/news", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("ScienceNewsSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p ScienceNewsGroup
		p.Data = []ScienceNews{}
		htmlparser.Find(`div[class="media__body"]`).Each(func(i int, s *goquery.Selection) {
			title := strings.TrimSpace(s.Find("h2").Text())
			url, _ := s.Find("h2 > a").Attr("href")
			url = "http://www.sciencemag.org" + url
			summary := "None"
			p.Data = append(p.Data, ScienceNews{title, summary, url})
		})
		// Open()
		err = p.save()
		// Close()
		if err != nil {
			return fmt.Errorf("error saving ScienceNewsGroup")
		}
		return nil
	})
}
Beispiel #4
0
func main() {
	wikiHTMLSpider := &WikipediaHTMLSpider{"Albert Einstein"}
	wikiJSONSpider := &WikipediaJSONSpider{"Lionel Messi"}

	spider.Add(schedule.Every(7*time.Second), wikiHTMLSpider)
	spider.Add(schedule.Every(9*time.Second), wikiJSONSpider)
	spider.Add(schedule.Every(5*time.Second), spider.Get("https://google.com", func(ctx *spider.Context) error {
		_, err := ctx.DoRequest()
		if err != nil {
			fmt.Println("Made request to google")
		}
		return nil
	}))
	spider.Start()

	<-time.After(26 * time.Second)
}
Beispiel #5
0
func TestInMemory(t *testing.T) {
	ran := false
	testSpider := spider.Get("http://google.com", func(ctx *spider.Context) error {
		ran = true
		return nil
	})
	sched := spider.NewScheduler()
	sched.Add(schedule.Every(1*time.Second), testSpider)
	sched.Start()
	dur := 1*time.Second + 500*time.Millisecond

	select {
	case <-time.After(dur):
		if !ran {
			t.Error("spider not ran")
		}
	}
	sched.Stop()
}
Beispiel #6
0
func init() {
	InTheatersSpider = spider.Get("http://www.metacritic.com/browse/movies/release-date/theaters/date", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("InTheatersSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p InTheatersGroup
		p.Data = []Movie{}
		htmlparser.Find(`div[class="product_wrap group_product_wrap"]`).Each(func(i int, s *goquery.Selection) {
			title := strings.TrimSpace(s.Find("a").Text())
			score, _ := strconv.Atoi(s.Find(`div[class*="metascore_w"]`).Text())
			date := strings.TrimSpace(s.Find(`span[class="data"]`).Text())
			_, month, _ := time.Now().Date()
			curMonth := month.String()[0:3]
			_, month, _ = time.Now().Add(time.Hour * -24 * 30).Date()
			lastMonth := month.String()[0:3]
			if (strings.Contains(date, curMonth) || strings.Contains(date, lastMonth)) && score > 60 {
				urlComponent := strings.Join(strings.Split("showtimes "+title, " "), "+")
				url := "https://www.google.com/?q=" + urlComponent + "#safe=active&q=" + urlComponent
				// fmt.Printf("%d: %s %d %s %s\n", i, title, score, date, url)
				p.Data = append(p.Data, Movie{title, date, score, url})
			}
		})
		// Open()
		err = p.save()
		// Close()
		if err != nil {
			return fmt.Errorf("error saving InTheatersGroup")
		}
		return nil
	})
}
Beispiel #7
0
func init() {
	ApNewsSpider = spider.Get("http://hosted.ap.org/dynamic/fronts/HOME?SITE=AP&SECTION=HOME", func(ctx *spider.Context) error {
		fmt.Println(time.Now())
		// Execute the request
		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		// Get goquery's html parser
		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}

		htmlparser.Find(`p[class="ap-newsbriefitem-p"]`).Each(func(i int, s *goquery.Selection) {
			title := s.Find("a").Text()
			link, _ := s.Find("a").Attr("href")
			summary := s.Find(`span[class="topheadlinebody"]`).Text()
			fmt.Printf("%d: %s (%s) - %s\n", i, title, link, summary)
		})
		return nil
	})
}
Beispiel #8
0
func init() {
	HackerNewsSpider = spider.Get("https://news.ycombinator.com/", func(ctx *spider.Context) error {
		fmt.Println(time.Now())
		// Execute the request
		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		// Get goquery's html parser
		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}

		title := ""
		origin := ""
		url := ""
		score := 0
		haveTitle := false
		htmlparser.Find(`tr`).Each(func(i int, s *goquery.Selection) {
			// fmt.Println(i)
			if math.Mod(float64(i), 2) == 0 {
				title = s.Find(`td[class="title"] > a`).Text()
				origin = s.Find(`span[class="sitebit comhead"]`).Text()
				url, _ = s.Find(`td[class="title"] > a`).Attr("href")
				haveTitle = true
			} else if math.Mod(float64(i), 2) == 1 && haveTitle == true {
				score, _ = strconv.Atoi(strings.Split(s.Find(`span[class="score"]`).Text(), " ")[0])
				if score > 10 {
					fmt.Println(title, origin, url, score)
				}
				haveTitle = false
			}
		})
		return nil
	})
}
Beispiel #9
0
func init() {
	WeatherSpider = spider.Get("http://forecast.weather.gov/MapClick.php?lat=35.9804&lon=-78.915&lg=english&&FcstType=digital", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("WeatherSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p WeatherData
		p.High = 0
		p.Low = 100
		p.Rain = 0
		gotTemperature := false
		gotRain := false
		isTemperature := false
		isRain := false
		htmlparser.Find(`tr[align="center"]`).Each(func(i int, s *goquery.Selection) {
			// fmt.Println(s.Text())
			s.Find(`td`).Each(func(j int, t *goquery.Selection) {
				if strings.TrimSpace(t.Text()) == "Thunder" || strings.TrimSpace(t.Text()) == "Dewpoint (°F)" {
					isRain = false
					isTemperature = false
				}
				if isTemperature == true {
					temp, _ := strconv.Atoi(t.Text())
					if temp > p.High {
						p.High = temp
					}
					if temp < p.Low {
						p.Low = temp
					}
				}
				if strings.TrimSpace(t.Text()) == "Temperature (°F)" && gotTemperature == false {
					// fmt.Println(t.Text())
					isTemperature = true
					gotTemperature = true
				}

				if isRain == true {
					if strings.TrimSpace(t.Text()) != "--" {
						rain, _ := strconv.Atoi(t.Text())
						p.Rain += rain
					}
				}
				if strings.TrimSpace(t.Text()) == "Rain" && gotRain == false {
					// fmt.Println(t.Text())
					isRain = true
					gotRain = true
				}
			})

			// p.Data = append(p.Data, ScienceNews{title, summary, url})
		})
		// Open()
		err = p.save()
		// Close()
		if err != nil {
			return fmt.Errorf("error saving WeatherData")
		}
		return nil
	})
}
Beispiel #10
0
func init() {
	GroceriesSpider = spider.Get("http://cowyo.com/grocerylist", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("GroceriesSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p CowyoGroup
		p.Data = []CowyoItem{}
		dats := htmlparser.Find(`textarea `).Text()
		for _, dat := range strings.Split(dats, "\n") {
			if len(dat) > 0 {
				p.Data = append(p.Data, CowyoItem{dat})
			}
		}
		err = p.save("GroceryList")
		if err != nil {
			return fmt.Errorf("error saving CowyoGroup")
		}
		return nil
	})
	CoolListSpider = spider.Get("http://cowyo.com/cool", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("CoolListSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p CowyoGroup
		p.Data = []CowyoItem{}
		dats := htmlparser.Find(`textarea `).Text()
		for _, dat := range strings.Split(dats, "\n") {
			if len(dat) > 0 {
				p.Data = append(p.Data, CowyoItem{dat})
			}
		}
		err = p.save("CoolList")
		if err != nil {
			return fmt.Errorf("error saving CowyoGroup")
		}
		return nil
	})
	TodoListSpider = spider.Get("http://cowyo.com/todo", func(ctx *spider.Context) error {
		fmt.Print(time.Now())
		fmt.Println("TodoListSpider")

		if _, err := ctx.DoRequest(); err != nil {
			return err
		}

		htmlparser, err := ctx.HTMLParser()
		if err != nil {
			return err
		}
		var p CowyoGroup
		p.Data = []CowyoItem{}
		dats := htmlparser.Find(`textarea `).Text()
		for _, dat := range strings.Split(dats, "\n") {
			if len(dat) > 0 {
				p.Data = append(p.Data, CowyoItem{dat})
			}
		}
		err = p.save("TodoList")
		if err != nil {
			return fmt.Errorf("error saving TodoList")
		}
		return nil
	})
}