func main() {
	engine.NewEngine("crawl_baidubaike_with_extractor").
		AddPipeline(pipeline.NewConsolePipeline()).
		SetProcesser(NewMyProcesser()).
		SetStartUrls(getUrlsFromFile("test.url")).
		Start()
}
func main() {
	engine.NewEngine("inject_useragent").
		SetStartUrl("http://my-user-agent.com/").
		SetProcesser(NewMyProcesser()).
		AddPlugin(plugin.NewUserAgentPlugin()).
		AddPipeline(pipeline.NewConsolePipeline()).
		SetConfig(common.NewConfig().SetHeaders(map[string]string{"User-Agent": "golang spider"})).
		Start()
}
func main() {
	var baseUrl = "http://www.douban.com/group/explore/"

	engine.NewEngine("crawl_douban_page_by_page").
		SetStartUrl(baseUrl).
		AddPipeline(pipeline.NewConsolePipeline()).
		SetProcesser(NewMyProcesser(baseUrl)).
		Start()
}
Пример #4
0
func (this *QuickEngine) GetEngine() *Engine {
	e := NewEngine(this.quickEngineConfig.TaskName).
		SetProcesser(NewQuickEngineProcesser(this.quickEngineConfig)).
		SetStartUrls(this.quickEngineConfig.StartUrls).
		SetConfig(this.quickEngineConfig.ToCommonConfig())

	if this.quickEngineConfig.LogFile != "" {
		var err error
		this.logFile, err = os.OpenFile(this.quickEngineConfig.LogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
		log.SetOutput(this.logFile)
		if err != nil {
			log.Fatal("cannot create log file: %s\n", this.quickEngineConfig.LogFile)
		}
	}

	if this.file != nil {
		return e.AddPipeline(pipeline.NewFilePipeline(this.file))
	} else if this.quickEngineConfig.OutputFile != "" {
		this.file, _ = os.Create(this.quickEngineConfig.OutputFile)
		return e.AddPipeline(pipeline.NewFilePipeline(this.file))
	}
	return e.AddPipeline(pipeline.NewConsolePipeline())

}
func main() {
	url := "http://m.baidu.com"
	engine.NewEngine("crawl_baidu_and_print_it").AddPipeline(pipeline.NewConsolePipeline()).SetStartUrl(url).Start()
}