Esempio n. 1
0
func main() {
	q := queue.UrlQueue{}
	for i := 1; i < 10; i++ {
		q.Push(fmt.Sprintf("URL: %d", i), i)
	}
	for {
		a, err := q.Pop()
		if err != nil {
			fmt.Println("Queue cleared.")
			break
		}
		fmt.Println(a.Url, a.Depth, q.Size)
	}
}
Esempio n. 2
0
func BasicCrawler(baseUrl string, maxDepth int, GoThere func(string) bool) map[string]string {
	// URL result map with data
	resUrls := make(map[string]string)
	// Downloader url feedback channel
	chUrl := make(chan queue.UrlItem)
	// Downloader finish response channel
	chResp := make(chan RespItem)
	defer func() {
		close(chUrl)
		close(chResp)
	}()
	q := queue.UrlQueue{}
	// Push base url to queue
	q.Push(baseUrl, 0)
	// running coroutine count
	running := 0
	// Main loop for concurrent crawler
	for {
		// Queue empty, and no running routine, exit
		if q.Size == 0 && running == 0 {
			break
		}
		if q.Size > 0 {
			if running < THREAD_NUMBER {
				ui, err := q.Pop()
				if err != nil {
					continue
				}
				_, visited := resUrls[ui.Url]
				if visited {
					continue
				}
				resUrls[ui.Url] = "OK"
				go crawl(ui, chUrl, chResp)
				running++
				//fmt.Println("running: ", running)
			}
		}
		select {
		case url := <-chUrl:
			if url.Depth < maxDepth && GoThere(url.Url) {
				q.Push(url.Url, url.Depth)
			}
		case ri := <-chResp:
			resUrls[ri.Url] = ri.RespBody
			running--
		default:
			continue
		}
	}
	return resUrls
}