func main() {
	engine.NewEngine("crawl_baidubaike_with_extractor").
		AddPipeline(pipeline.NewConsolePipeline()).
		SetProcesser(NewMyProcesser()).
		SetStartUrls(getUrlsFromFile("test.url")).
		Start()
}
func main() {
	engine.NewEngine("inject_useragent").
		SetStartUrl("http://my-user-agent.com/").
		SetProcesser(NewMyProcesser()).
		AddPlugin(plugin.NewUserAgentPlugin()).
		AddPipeline(pipeline.NewConsolePipeline()).
		SetConfig(common.NewConfig().SetHeaders(map[string]string{"User-Agent": "golang spider"})).
		Start()
}
func main() {
	var baseUrl = "http://www.douban.com/group/explore/"

	engine.NewEngine("crawl_douban_page_by_page").
		SetStartUrl(baseUrl).
		AddPipeline(pipeline.NewConsolePipeline()).
		SetProcesser(NewMyProcesser(baseUrl)).
		Start()
}
func main() {
	statusFile, _ := os.Create("status.url")
	defer statusFile.Close()

	engine.NewEngine("crawling_status").
		SetStartUrls(getUrlsFromFile("test.url")).
		AddPlugin(plugin.NewStatusPlugin(statusFile)).
		Start()
}
func main() {
	file, _ := os.Create("crawl_baidubaike_and_store_it_output.txt")
	defer file.Close()

	engine.NewEngine("crawl_baidubaike_and_store_it").
		AddPipeline(pipeline.NewFilePipeline(file)).
		SetProcesser(NewMyProcesser()).
		SetStartUrls(getUrlsFromFile("test.url")).
		Start()
}
Esempio n. 6
0
func main() {
	config := common.NewConfig().
		SetConcurrency(1000).
		SetWaitTime(10 * time.Millisecond).
		SetPollingTime(10 * time.Millisecond)

	engine.NewEngine("crawl_rate").
		SetStartUrls(genUrls()).
		SetProcesser(NewMyProcesser()).
		SetConfig(config).
		Start()
}
func main() {
	url := "http://m.baidu.com"
	engine.NewEngine("crawl_baidu_and_print_it").AddPipeline(pipeline.NewConsolePipeline()).SetStartUrl(url).Start()
}