func enqueue(uri string, queue chan string) { fmt.Println("fetching", uri) visited[uri] = true //将要访问的uri放进visited,并赋值为true tlsConfig := &tls.Config{ //&的意思:生成一个tls.Config对象,其InsecureSkipVerify值设为true InsecureSkipVerify: true, } transport := &http.Transport{ TLSClientConfig: tlsConfig, } client := http.Client{Transport: transport} resp, err := client.Get(uri) if err != nil { return } defer resp.Body.Close() //defer等上面的client.Get执行完毕后关闭连接 links := collectlinks.All(resp.Body) //links是一个包含了二级超链接的slice for _, link := range links { absolute := fixUrl(link, uri) if uri != "" { if !visited[absolute] { //如果absolute在visited里不为true,就将absolute放进queue channle go func() { queue <- absolute }() } //二级超链接下可能还有下级链接,所以放到queue中继续执行enqueue。 } } }
func retrieveQueue(client *http.Client, uri string, queue chan string) { fmt.Println("fetching: " + uri + " ...") var added = make(map[string]bool) resp, err := client.Get(uri) if err != nil { return } defer resp.Body.Close() added[uri] = true // fmt.Println("uri visited is: ", uri) // fmt.Println("type of uri: ", reflect.TypeOf(uri)) links := collectlinks.All(resp.Body) for _, link := range links { absolute := fixURL(link, uri) // fmt.Println("ab visited is: ", absolute) if absolute != "" && !added[absolute] { // fmt.Println("type of ab: ", reflect.TypeOf(absolute)) added[absolute] = true go func() { queue <- absolute }() } } }
func main() { flag.Parse() args := flag.Args() fmt.Println(args) //if len(args) < 1 { // fmt.Println("Please specify start page") // os.Exit(1) //} tlsConfig := &tls.Config{ // The &thing{a: b} syntax is equivalent to InsecureSkipVerify: true, // new(thing(a: b)) in other languages. } // It gives you a new 'thing' object (in this // case a new 'tls.Config' object) and sets the // 'a' attribute to a value of 'b'. transport := &http.Transport{ // And we take that tlsConfig object we instantiated TLSClientConfig: tlsConfig, // and use it as the value for another new object's } // 'TLSClientConfig' attribute. client := http.Client{Transport: transport} // Go typicaly gives you sane defaults (like 'http.Get') // and also provides a way to override them. resp, err := client.Get("https://www.reddit.com/r/montreal") // this line is basically the same as before, only if err != nil { // we're calling 'Get' on a customized client rather return // than the 'http' package directly. } defer resp.Body.Close() links := collectlinks.All(resp.Body) for _, link := range links { fmt.Println(link) } }
func enqueue(uri string, queue chan string) { fmt.Println("fetching", uri) visited[uri] = true tlsConfig := &tls.Config{ InsecureSkipVerify: true, } transport := &http.Transport{ TLSClientConfig: tlsConfig, } client := http.Client{Transport: transport} resp, err := client.Get(uri) if err != nil { return } defer resp.Body.Close() links := collectlinks.All(resp.Body) for _, link := range links { absolute := fixUrl(link, uri) if uri != "" && !visited[absolute] { go func() { queue <- absolute }() } } }
func main() { startTime := time.Now() flag.Parse() if len(*mes) == 0 { log.Fatalf("Mês de referência inválido:%s. Formato esperado: mm/yyyy.", *mes) } param := fmt.Sprintf("parametros[]=%s¶metros[]=¶metros[]=¶metros[]=", *mes) resp, err := http.Post( urlConsulta, "application/x-www-form-urlencoded", bytes.NewBufferString(param)) if err != nil { log.Fatal(err) } defer resp.Body.Close() if err != nil { log.Fatal(err) } links := make(chan string, *maxWorkers) results := make(chan string, *maxWorkers) var wg sync.WaitGroup for i := 0; i < *maxWorkers; i++ { wg.Add(1) go func() { defer wg.Done() doWork(links, results) }() } // Work is done, close the result channel. go func() { wg.Wait() close(results) }() // Fill up work queue. go func() { for _, link := range collectlinks.All(resp.Body) { links <- link } close(links) }() // Print results. for row := range results { fmt.Println(row) } fmt.Fprintf(os.Stderr, "\nFinished! Duration: %s\n", time.Now().Sub(startTime)) }
func enqueue(url string, q chan string) { fmt.Println("fetching", url) res, err := http.Get(url) if err != nil { return } defer res.Body.Close() visited[url] = true links := collectlinks.All(res.Body) for _, link := range links { aURL := getAbsoluteURL(link, url) if !visited[aURL] { go func() { q <- aURL }() } } }
func enqueue(uri string, queue chan string) { fmt.Println("fetching", uri) depth++ fmt.Println("depth", depth) visited[uri] = true defer func() { depth-- }() if depth > 1 { return } transport := &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, }, } client := http.Client{Transport: transport} resp, err := client.Get(uri) if err != nil { return } defer resp.Body.Close() links := collectlinks.All(resp.Body) for _, link := range links { absolute := fixUrl(link, uri) //fmt.Println("absolute", absolute) if (uri != "") && (strings.HasPrefix(absolute, uri)) { if !visited[absolute] { go func() { queue <- absolute fmt.Println("added", absolute) }() } } } fmt.Println("exit", uri) }
func enqueue(uri string, queue chan string) { if uri == end { fmt.Println("Target found: " + uri) close(queue) return } if strings.Contains(uri, "en.wikipedia.org/wiki/") { fmt.Println(count, uri) count++ visited[uri] = true transport := &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, }, } client := http.Client{Transport: transport} resp, err := client.Get(uri) if err != nil { return } defer resp.Body.Close() links := collectlinks.All(resp.Body) for _, link := range links { absolute := fixUrl(link, uri) if uri != "" { if !visited[absolute] { go func() { queue <- absolute }() } } } } return }
func retrieve(uri string, depth int, wg *sync.WaitGroup) { defer wg.Done() if depth <= 0 { fmt.Sprintf("<- Done with %v, depth 0.\n", uri) return } fmt.Println("fetching ", uri) resp, err := http.Get(uri) if err != nil { return } defer resp.Body.Close() links := collectlinks.All(resp.Body) for _, link := range links { absolute := parseUrl(link, uri) if !visited[absolute] { wg.Add(1) go retrieve(absolute, depth-1, wg) } } return }