示例#1
0
func main() {
	// Code used to profile the application.
	// cfg := profile.Config{
	// 	MemProfile: true,
	// 	CPUProfile: true,
	// }
	// p := profile.Start(&cfg)
	// defer p.Stop()

	// Record the start time of execution.
	start := time.Now()

	// Define flags for the input file, search term, and log level.
	term := flag.String("search", "", "required: please provide a search term")
	path := flag.String("input", "urls.txt", "enter the location of the file containing URLs")
	verbose := flag.Bool("verbose", false, "verbose logging option")
	flag.Parse()

	// Set the log level based on the -verbose flag.
	if *verbose == true {
		log.SetLevel(4)
	}

	// Read the input file.
	urls, err := readFile(*path)
	if err != nil {
		log.Fatal("go-search", "Error reading from urls file", "error", err)
	}

	// Pass the search term and slice of URLs to the search method.
	// Note: Remove the first item of the urls slice (the column name).
	results := search(*term, urls[1:])

	// Write to the output file.
	err = writeFile(results)
	if err != nil {
		log.Fatal("go-search", "Error writing to results file", "error", err)
	}

	// Log the total execution time.
	log.Info("go-search", fmt.Sprintf("Search took %s", time.Since(start)))
}
示例#2
0
// search takes a search term and a slice of URLs, fetches the
// page content for each URL, performs a search, and then returns
// a slice of results containing the result and any errors encountered.
func search(term string, urls []string) []result {

	// If no search term was provided, exit.
	if term == "" {
		log.Fatal("go-search", "No search term was provided. Expected arguments: '-search=searchTerm'.")
	} else {
		// Lowercase the search term so our comparisons will be case-insensitive.
		term = strings.ToLower(term)
	}

	// Create a chan of strings to send work to be processed (urls).
	// Create a chan of type result to send results.
	// Set up a WaitGroup so we can track when all goroutines have finished processing.
	ch := make(chan string)
	done := make(chan result)
	var wg sync.WaitGroup

	// Create a single http Client with an 8 second timeout.
	// From the docs: "Clients should be reused instead of created as
	// needed. Clients are safe for concurrent use by multiple goroutines."
	client := &http.Client{
		Timeout: 8 * time.Second,
	}

	// If there are less than 20 urls in urls.txt, decrease maxReqs to
	// the number of urls to avoid spinning up unnecessary goroutines.
	if maxReqs > len(urls) {
		maxReqs = len(urls)
	}

	log.Info("go-search", "Fetching and searching urls...")
	log.Info("go-search", "Go ahead, queue up your favorite jam: this will take ~30 seconds")

	// Spin up 'maxReqs' number of goroutines.
	wg.Add(maxReqs)
	for i := 0; i < maxReqs; i++ {
		go func() {
			for {
				// Recieve work from the chan of strings (urls).
				site, ok := <-ch
				if !ok {
					// If the channel is closed, there is no more work to be done and we can return.
					wg.Done()
					return
				}

				// Provide some visual feedback to the user for each url processed.
				v := flag.Lookup("verbose")
				if v.Value.String() == "false" {
					fmt.Print(".")
				}

				// Fetch the page content.
				response, err := client.Get("http://" + site)
				if err != nil {
					// If there are errors, try again with the 'www' host prefix.
					log.Debug("go-search", fmt.Sprintf("Initial request failed for %s, attempting 'www' prefix.", site), "error", err)

					response, err = client.Get("http://www." + site)
				}

				// If there are still errors, return the error message and continue looping.
				if err != nil {
					log.Debug("go-search", fmt.Sprintf("Both requests failed for %s, returning an error.", site), "error", err)

					done <- result{site, false, err}
					continue
				}

				// Extract the human-readable text from the response.
				// Note that FromReader uses html.Parse under the hood,
				// which reads to EOF in the same manner as ioutil.ReadAll.
				// https://github.com/jaytaylor/html2text/blob/master/html2text.go#L167
				text, err := html2text.FromReader(response.Body)
				response.Body.Close()
				if err != nil {
					done <- result{site, false, err}
					continue
				}

				// Search for the search term in the page text and return the final result.
				found := strings.Contains(strings.ToLower(text), term)
				done <- result{site, found, nil}
			}
		}()
	}

	// Send work to be processed as goroutines become available.
	go func() {
		for _, site := range urls {
			log.Debug("go-search", fmt.Sprintf("Sending work: %s", site))
			ch <- site
		}
	}()

	// Receive the results on the done chan.
	results := []result{}
	for i := 0; i < len(urls); i++ {
		select {
		case result := <-done:
			log.Debug("go-search", fmt.Sprintf("Receiving result: %s", result.site))
			results = append(results, result)
		}
	}

	// Close the channel as a signal to the goroutines that no additional work needs to be processed.
	close(ch)

	// Wait for the goroutines to be done processing.
	wg.Wait()

	fmt.Print("Done!\n")
	return results
}