func autocrawl(needAll bool, crawlConfFile string, whichSite string) { _, err := config.ParseConfig(crawlConfFile, &websites) if err != nil { log.Fatalln("parse crawl config error:", err) } if needAll { // 全量 for website, wbconf := range websites { if whichSite != "" && whichSite != website { continue } logger.Infoln("all crawl", website) go doCrawl(wbconf, true) } } // 定时增量 c := cron.New() c.AddFunc(config.Config["crawl_spec"], func() { // 抓取 reddit go service.ParseReddit("") // 抓取 www.oschina.net/project go service.ParseProjectList("http://www.oschina.net/project/lang/358/go?tag=0&os=0&sort=time") for website, wbconf := range websites { if whichSite != "" && whichSite != website { continue } logger.Infoln("do crawl", website) go doCrawl(wbconf, false) } }) c.Start() }
func TestParseProjectList(t *testing.T) { service.ParseProjectList("http://www.oschina.net/project/lang/358/go?tag=0&os=0&sort=view") }