Esempio n. 1
0
func newTestSiteLinksOnly(url string, links ...string) *domain.Site {
	sites := make([]*domain.Site, len(links))
	for i, l := range links {
		sites[i] = domain.NewSite(l)
	}
	return &domain.Site{
		Url:   url,
		Links: sites,
	}
}
Esempio n. 2
0
func (builder *SitemapBuilder) Build(startingURL string) *domain.Sitemap {
	var workers sync.WaitGroup

	siteChannel := make(chan *domain.Site, builder.workers*5)
	responseChannel := make(chan *linkResponse, builder.workers)

	defer close(siteChannel)
	defer close(responseChannel)

	url := translateLink("", startingURL)
	top := domain.NewSite(url)
	sitemap := domain.NewSitemap(top)

	go builder.processResponses(sitemap, responseChannel, siteChannel)

	siteChannel <- top

	for i := 0; i < builder.workers; i++ {
		workers.Add(1)
		go func(index int) {
			defer workers.Done()

			last := time.Date(1980, time.January, 1, 1, 1, 1, 1, time.UTC)
		loop:
			for {
				elapsed := time.Since(last)
				remaining := builder.rate - elapsed
				if remaining > 0 {
					select {
					case <-builder.interruptChannel:
						break loop
					case <-time.After(remaining):
					}
				}
				last = time.Now() // reset
				select {
				case <-builder.interruptChannel:
					log.Println("Interrupt:", index)
					break loop
				case site := <-siteChannel:
					builder.procesSite(site, responseChannel)
				case <-time.After(builder.timeout):
					log.Println("Timeout:", index)
					break loop
				}
			}
		}(i)
	}

	workers.Wait()

	return sitemap
}