예제 #1
0
파일: crawler.go 프로젝트: gachoyi/user
func enqueue(uri string, queue chan string) {
	fmt.Println("fetching", uri)
	visited[uri] = true       //将要访问的uri放进visited,并赋值为true
	tlsConfig := &tls.Config{ //&的意思:生成一个tls.Config对象,其InsecureSkipVerify值设为true
		InsecureSkipVerify: true,
	}
	transport := &http.Transport{
		TLSClientConfig: tlsConfig,
	}
	client := http.Client{Transport: transport}
	resp, err := client.Get(uri)
	if err != nil {
		return
	}
	defer resp.Body.Close()              //defer等上面的client.Get执行完毕后关闭连接
	links := collectlinks.All(resp.Body) //links是一个包含了二级超链接的slice
	for _, link := range links {
		absolute := fixUrl(link, uri)
		if uri != "" {
			if !visited[absolute] { //如果absolute在visited里不为true,就将absolute放进queue channle
				go func() { queue <- absolute }()
			} //二级超链接下可能还有下级链接,所以放到queue中继续执行enqueue。
		}

	}

}
예제 #2
0
파일: krawl.go 프로젝트: sim4life/go_poc
func retrieveQueue(client *http.Client, uri string, queue chan string) {
	fmt.Println("fetching: " + uri + " ...")
	var added = make(map[string]bool)
	resp, err := client.Get(uri)
	if err != nil {
		return
	}
	defer resp.Body.Close()

	added[uri] = true
	// fmt.Println("uri visited is: ", uri)
	// fmt.Println("type of uri: ", reflect.TypeOf(uri))

	links := collectlinks.All(resp.Body)
	for _, link := range links {
		absolute := fixURL(link, uri)
		// fmt.Println("ab visited is: ", absolute)
		if absolute != "" && !added[absolute] {
			// fmt.Println("type of ab: ", reflect.TypeOf(absolute))
			added[absolute] = true
			go func() { queue <- absolute }()
		}
	}

}
예제 #3
0
func main() {
	flag.Parse()

	args := flag.Args()
	fmt.Println(args)
	//if len(args) < 1 {
	//  fmt.Println("Please specify start page")
	//  os.Exit(1)
	//}

	tlsConfig := &tls.Config{ // The &thing{a: b} syntax is equivalent to
		InsecureSkipVerify: true, // new(thing(a: b)) in other languages.
	} // It gives you a new 'thing' object (in this
	// case a new 'tls.Config' object) and sets the
	// 'a' attribute to a value of 'b'.

	transport := &http.Transport{ // And we take that tlsConfig object we instantiated
		TLSClientConfig: tlsConfig, // and use it as the value for another new object's
	} // 'TLSClientConfig' attribute.

	client := http.Client{Transport: transport} // Go typicaly gives you sane defaults (like 'http.Get')
	// and also provides a way to override them.

	resp, err := client.Get("https://www.reddit.com/r/montreal") // this line is basically the same as before, only
	if err != nil {                                              // we're calling 'Get' on a customized client rather
		return // than the 'http' package directly.
	}
	defer resp.Body.Close()

	links := collectlinks.All(resp.Body)

	for _, link := range links {
		fmt.Println(link)
	}
}
예제 #4
0
파일: crawl.go 프로젝트: okamuuu/awesome-go
func enqueue(uri string, queue chan string) {
	fmt.Println("fetching", uri)
	visited[uri] = true
	tlsConfig := &tls.Config{
		InsecureSkipVerify: true,
	}
	transport := &http.Transport{
		TLSClientConfig: tlsConfig,
	}

	client := http.Client{Transport: transport}

	resp, err := client.Get(uri)
	if err != nil {
		return
	}
	defer resp.Body.Close()
	links := collectlinks.All(resp.Body)

	for _, link := range links {
		absolute := fixUrl(link, uri)
		if uri != "" && !visited[absolute] {
			go func() { queue <- absolute }()
		}
	}
}
예제 #5
0
파일: main.go 프로젝트: danielfireman/phd
func main() {
	startTime := time.Now()

	flag.Parse()

	if len(*mes) == 0 {
		log.Fatalf("Mês de referência inválido:%s. Formato esperado: mm/yyyy.", *mes)
	}

	param := fmt.Sprintf("parametros[]=%s&parametros[]=&parametros[]=&parametros[]=", *mes)
	resp, err := http.Post(
		urlConsulta,
		"application/x-www-form-urlencoded",
		bytes.NewBufferString(param))
	if err != nil {
		log.Fatal(err)
	}

	defer resp.Body.Close()
	if err != nil {
		log.Fatal(err)
	}

	links := make(chan string, *maxWorkers)
	results := make(chan string, *maxWorkers)
	var wg sync.WaitGroup
	for i := 0; i < *maxWorkers; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			doWork(links, results)
		}()
	}

	// Work is done, close the result channel.
	go func() {
		wg.Wait()
		close(results)
	}()

	// Fill up work queue.
	go func() {
		for _, link := range collectlinks.All(resp.Body) {
			links <- link
		}
		close(links)
	}()

	// Print results.
	for row := range results {
		fmt.Println(row)
	}

	fmt.Fprintf(os.Stderr, "\nFinished! Duration: %s\n", time.Now().Sub(startTime))
}
예제 #6
0
func enqueue(url string, q chan string) {
	fmt.Println("fetching", url)
	res, err := http.Get(url)
	if err != nil {
		return
	}
	defer res.Body.Close()

	visited[url] = true
	links := collectlinks.All(res.Body)
	for _, link := range links {
		aURL := getAbsoluteURL(link, url)
		if !visited[aURL] {
			go func() { q <- aURL }()
		}
	}
}
예제 #7
0
func enqueue(uri string, queue chan string) {
	fmt.Println("fetching", uri)

	depth++

	fmt.Println("depth", depth)

	visited[uri] = true

	defer func() { depth-- }()

	if depth > 1 {
		return
	}

	transport := &http.Transport{
		TLSClientConfig: &tls.Config{
			InsecureSkipVerify: true,
		},
	}
	client := http.Client{Transport: transport}
	resp, err := client.Get(uri)
	if err != nil {
		return
	}
	defer resp.Body.Close()

	links := collectlinks.All(resp.Body)

	for _, link := range links {
		absolute := fixUrl(link, uri)
		//fmt.Println("absolute", absolute)
		if (uri != "") && (strings.HasPrefix(absolute, uri)) {
			if !visited[absolute] {
				go func() {
					queue <- absolute
					fmt.Println("added", absolute)
				}()
			}
		}
	}
	fmt.Println("exit", uri)
}
예제 #8
0
파일: wiki.go 프로젝트: godwhoa/1cpd
func enqueue(uri string, queue chan string) {
	if uri == end {
		fmt.Println("Target found: " + uri)
		close(queue)
		return
	}

	if strings.Contains(uri, "en.wikipedia.org/wiki/") {
		fmt.Println(count, uri)
		count++
		visited[uri] = true
		transport := &http.Transport{
			TLSClientConfig: &tls.Config{
				InsecureSkipVerify: true,
			},
		}
		client := http.Client{Transport: transport}
		resp, err := client.Get(uri)
		if err != nil {
			return
		}
		defer resp.Body.Close()

		links := collectlinks.All(resp.Body)

		for _, link := range links {
			absolute := fixUrl(link, uri)
			if uri != "" {
				if !visited[absolute] {
					go func() { queue <- absolute }()
				}
			}
		}
	}
	return
}
예제 #9
0
func retrieve(uri string, depth int, wg *sync.WaitGroup) {
	defer wg.Done()
	if depth <= 0 {
		fmt.Sprintf("<- Done with %v, depth 0.\n", uri)
		return
	}
	fmt.Println("fetching ", uri)
	resp, err := http.Get(uri)
	if err != nil {
		return
	}

	defer resp.Body.Close()

	links := collectlinks.All(resp.Body)
	for _, link := range links {
		absolute := parseUrl(link, uri)
		if !visited[absolute] {
			wg.Add(1)
			go retrieve(absolute, depth-1, wg)
		}
	}
	return
}