Пример #1
0
func autocrawl(needAll bool, crawlConfFile string, whichSite string) {

	_, err := config.ParseConfig(crawlConfFile, &websites)
	if err != nil {
		log.Fatalln("parse crawl config error:", err)
	}

	if needAll {
		// 全量
		for website, wbconf := range websites {
			if whichSite != "" && whichSite != website {
				continue
			}

			logger.Infoln("all crawl", website)
			go doCrawl(wbconf, true)
		}
	}

	// 定时增量
	c := cron.New()
	c.AddFunc(config.Config["crawl_spec"], func() {
		// 抓取 reddit
		go service.ParseReddit("")

		// 抓取 www.oschina.net/project
		go service.ParseProjectList("http://www.oschina.net/project/lang/358/go?tag=0&os=0&sort=time")

		for website, wbconf := range websites {
			if whichSite != "" && whichSite != website {
				continue
			}

			logger.Infoln("do crawl", website)
			go doCrawl(wbconf, false)
		}
	})
	c.Start()
}
Пример #2
0
func TestParseProjectList(t *testing.T) {
	service.ParseProjectList("http://www.oschina.net/project/lang/358/go?tag=0&os=0&sort=view")
}