func main() { q := queue.UrlQueue{} for i := 1; i < 10; i++ { q.Push(fmt.Sprintf("URL: %d", i), i) } for { a, err := q.Pop() if err != nil { fmt.Println("Queue cleared.") break } fmt.Println(a.Url, a.Depth, q.Size) } }
func BasicCrawler(baseUrl string, maxDepth int, GoThere func(string) bool) map[string]string { // URL result map with data resUrls := make(map[string]string) // Downloader url feedback channel chUrl := make(chan queue.UrlItem) // Downloader finish response channel chResp := make(chan RespItem) defer func() { close(chUrl) close(chResp) }() q := queue.UrlQueue{} // Push base url to queue q.Push(baseUrl, 0) // running coroutine count running := 0 // Main loop for concurrent crawler for { // Queue empty, and no running routine, exit if q.Size == 0 && running == 0 { break } if q.Size > 0 { if running < THREAD_NUMBER { ui, err := q.Pop() if err != nil { continue } _, visited := resUrls[ui.Url] if visited { continue } resUrls[ui.Url] = "OK" go crawl(ui, chUrl, chResp) running++ //fmt.Println("running: ", running) } } select { case url := <-chUrl: if url.Depth < maxDepth && GoThere(url.Url) { q.Push(url.Url, url.Depth) } case ri := <-chResp: resUrls[ri.Url] = ri.RespBody running-- default: continue } } return resUrls }