func main() { // Initialize URL container with an IP prefix filter and seed URLs allowedNetworks := bslc.NewIPNetContainer(readFile("is-prefixes.txt")) seedURLs := readFile("is-url_seeds.txt") urls := bslc.NewLocalURLContainer(allowedNetworks, seedURLs) // Initialize crawler crawler := bslc.Crawler{ URLs: urls, MaxConcurrentConnections: 20, } // Register mimetype handler channel with crawler ch := make(chan *bslc.Content) crawler.AddMimeTypes(mimetypes.Images, ch) // Start content handlers wg := sync.WaitGroup{} for i := 0; i < crawler.MaxConcurrentConnections; i++ { wg.Add(1) go func() { defer wg.Done() for content := range ch { // Process received content log.Println("Received URL: ", content.URL.String()) saveFile("saved", content) content.Done <- true // Signal when done with content } }() } // Start crawling and wait until done crawler.StartCrawling() wg.Wait() }
func main() { // Initialize crawler crawler := bslc.Crawler{ URLs: bslc.NewLocalURLContainer(bslc.NewIPNetContainer(prefixes), seedUrls), MaxConcurrentConnections: 5, } // Register mimetype handler channel with crawler ch := make(chan *bslc.Content) crawler.AddMimeTypes(exeTypes, ch) // Start content handlers wg := sync.WaitGroup{} for i := 0; i < crawler.MaxConcurrentConnections; i++ { wg.Add(1) go func() { defer wg.Done() for content := range ch { // Process received content if strings.HasPrefix(strings.ToLower(content.Filename), ".exe") { saveFile("saved", content) } // Signal when done content.Done <- true } }() } // Start crawling and wait until done crawler.StartCrawling() wg.Wait() }
func Example() { // Initialize URL container with IPnets filter and add seed URLs allowedNetworks := bslc.NewIPNetContainer([]string{"127.0.0.0/8"}) seedUrls := []string{"http://127.0.0.1/"} urls := bslc.NewLocalURLContainer(allowedNetworks, seedUrls) // Initialize crawler crawler := bslc.Crawler{ URLs: urls, MaxConcurrentConnections: 5, } // Register mimetype handler channel with crawler ch := make(chan *bslc.Content) crawler.AddMimeTypes(mimetypes.Audio, ch) // Start content handlers wg := sync.WaitGroup{} for i := 0; i < crawler.MaxConcurrentConnections; i++ { wg.Add(1) go func() { defer wg.Done() for content := range ch { // Process received content fmt.Println("Received content from URL: ", content.URL.String()) // Signal when done with content content.Done <- true } }() } // Start crawling and wait until done crawler.StartCrawling() wg.Wait() }