Beispiel #1
0
func main() {
	// Initialize URL container with an IP prefix filter and seed URLs
	allowedNetworks := bslc.NewIPNetContainer(readFile("is-prefixes.txt"))
	seedURLs := readFile("is-url_seeds.txt")
	urls := bslc.NewLocalURLContainer(allowedNetworks, seedURLs)

	// Initialize crawler
	crawler := bslc.Crawler{
		URLs: urls,
		MaxConcurrentConnections: 20,
	}

	// Register mimetype handler channel with crawler
	ch := make(chan *bslc.Content)
	crawler.AddMimeTypes(mimetypes.Images, ch)

	// Start content handlers
	wg := sync.WaitGroup{}
	for i := 0; i < crawler.MaxConcurrentConnections; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for content := range ch {
				// Process received content
				log.Println("Received URL: ", content.URL.String())
				saveFile("saved", content)
				content.Done <- true // Signal when done with content
			}
		}()
	}

	// Start crawling and wait until done
	crawler.StartCrawling()
	wg.Wait()
}
Beispiel #2
0
func main() {
	// Initialize crawler
	crawler := bslc.Crawler{
		URLs: bslc.NewLocalURLContainer(bslc.NewIPNetContainer(prefixes), seedUrls),
		MaxConcurrentConnections: 5,
	}

	// Register mimetype handler channel with crawler
	ch := make(chan *bslc.Content)
	crawler.AddMimeTypes(exeTypes, ch)

	// Start content handlers
	wg := sync.WaitGroup{}
	for i := 0; i < crawler.MaxConcurrentConnections; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for content := range ch {
				// Process received content
				if strings.HasPrefix(strings.ToLower(content.Filename), ".exe") {
					saveFile("saved", content)
				}

				// Signal when done
				content.Done <- true
			}
		}()
	}

	// Start crawling and wait until done
	crawler.StartCrawling()
	wg.Wait()
}
Beispiel #3
0
func Example() {
	// Initialize URL container with IPnets filter and add seed URLs
	allowedNetworks := bslc.NewIPNetContainer([]string{"127.0.0.0/8"})
	seedUrls := []string{"http://127.0.0.1/"}
	urls := bslc.NewLocalURLContainer(allowedNetworks, seedUrls)

	// Initialize crawler
	crawler := bslc.Crawler{
		URLs: urls,
		MaxConcurrentConnections: 5,
	}

	// Register mimetype handler channel with crawler
	ch := make(chan *bslc.Content)
	crawler.AddMimeTypes(mimetypes.Audio, ch)

	// Start content handlers
	wg := sync.WaitGroup{}
	for i := 0; i < crawler.MaxConcurrentConnections; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for content := range ch {
				// Process received content
				fmt.Println("Received content from URL: ", content.URL.String())

				// Signal when done with content
				content.Done <- true
			}
		}()
	}

	// Start crawling and wait until done
	crawler.StartCrawling()
	wg.Wait()
}