func main() { engine.NewEngine("crawl_baidubaike_with_extractor"). AddPipeline(pipeline.NewConsolePipeline()). SetProcesser(NewMyProcesser()). SetStartUrls(getUrlsFromFile("test.url")). Start() }
func main() { engine.NewEngine("inject_useragent"). SetStartUrl("http://my-user-agent.com/"). SetProcesser(NewMyProcesser()). AddPlugin(plugin.NewUserAgentPlugin()). AddPipeline(pipeline.NewConsolePipeline()). SetConfig(common.NewConfig().SetHeaders(map[string]string{"User-Agent": "golang spider"})). Start() }
func main() { var baseUrl = "http://www.douban.com/group/explore/" engine.NewEngine("crawl_douban_page_by_page"). SetStartUrl(baseUrl). AddPipeline(pipeline.NewConsolePipeline()). SetProcesser(NewMyProcesser(baseUrl)). Start() }
func main() { statusFile, _ := os.Create("status.url") defer statusFile.Close() engine.NewEngine("crawling_status"). SetStartUrls(getUrlsFromFile("test.url")). AddPlugin(plugin.NewStatusPlugin(statusFile)). Start() }
func main() { file, _ := os.Create("crawl_baidubaike_and_store_it_output.txt") defer file.Close() engine.NewEngine("crawl_baidubaike_and_store_it"). AddPipeline(pipeline.NewFilePipeline(file)). SetProcesser(NewMyProcesser()). SetStartUrls(getUrlsFromFile("test.url")). Start() }
func main() { config := common.NewConfig(). SetConcurrency(1000). SetWaitTime(10 * time.Millisecond). SetPollingTime(10 * time.Millisecond) engine.NewEngine("crawl_rate"). SetStartUrls(genUrls()). SetProcesser(NewMyProcesser()). SetConfig(config). Start() }
func main() { url := "http://m.baidu.com" engine.NewEngine("crawl_baidu_and_print_it").AddPipeline(pipeline.NewConsolePipeline()).SetStartUrl(url).Start() }