func main() { engine.NewEngine("crawl_baidubaike_with_extractor"). AddPipeline(pipeline.NewConsolePipeline()). SetProcesser(NewMyProcesser()). SetStartUrls(getUrlsFromFile("test.url")). Start() }
func main() { engine.NewEngine("inject_useragent"). SetStartUrl("http://my-user-agent.com/"). SetProcesser(NewMyProcesser()). AddPlugin(plugin.NewUserAgentPlugin()). AddPipeline(pipeline.NewConsolePipeline()). SetConfig(common.NewConfig().SetHeaders(map[string]string{"User-Agent": "golang spider"})). Start() }
func main() { var baseUrl = "http://www.douban.com/group/explore/" engine.NewEngine("crawl_douban_page_by_page"). SetStartUrl(baseUrl). AddPipeline(pipeline.NewConsolePipeline()). SetProcesser(NewMyProcesser(baseUrl)). Start() }
func (this *QuickEngine) GetEngine() *Engine { e := NewEngine(this.quickEngineConfig.TaskName). SetProcesser(NewQuickEngineProcesser(this.quickEngineConfig)). SetStartUrls(this.quickEngineConfig.StartUrls). SetConfig(this.quickEngineConfig.ToCommonConfig()) if this.quickEngineConfig.LogFile != "" { var err error this.logFile, err = os.OpenFile(this.quickEngineConfig.LogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) log.SetOutput(this.logFile) if err != nil { log.Fatal("cannot create log file: %s\n", this.quickEngineConfig.LogFile) } } if this.file != nil { return e.AddPipeline(pipeline.NewFilePipeline(this.file)) } else if this.quickEngineConfig.OutputFile != "" { this.file, _ = os.Create(this.quickEngineConfig.OutputFile) return e.AddPipeline(pipeline.NewFilePipeline(this.file)) } return e.AddPipeline(pipeline.NewConsolePipeline()) }
func main() { url := "http://m.baidu.com" engine.NewEngine("crawl_baidu_and_print_it").AddPipeline(pipeline.NewConsolePipeline()).SetStartUrl(url).Start() }