// NewSpider()方法单元测试 func TestNewSpider(t *testing.T) { confPath := "../../conf" confFile := confPath + "/spider.conf" seedUrls := []string{"http://www.baidu.com"} conf, _ := conf.InitConf(confFile) if s := NewSpider(seedUrls, conf, confPath); s == nil { t.Error("spider.NewSpider failes") } else { t.Log("spider.NewSpider passed.") } }
// 爬虫主程序 func main() { // l4g的配置文件 l4g.LoadConfiguration(SPIDER_LOGCONF_XML) // refer : http://www.01happy.com/golang-command-line-arguments/ var confPath string var logPath string var printVer bool flag.StringVar(&confPath, "c", "../../conf", "config file path") flag.StringVar(&logPath, "l", "../../log", "log file path") flag.BoolVar(&printVer, "v", false, "print version") flag.Parse() if printVer { utils.PrintVersion() os.Exit(0) } l4g.Info("Hi, dash's %s is running...\n", "go_mini_spider") confFile := confPath + "/" + SPIDER_CONFIG_FILE conf, err := conf.InitConf(confFile) if err != nil { l4g.Error("read spider config failed, err [%s]", err) SlowExit() } var seedUrls []string // read and parse json,相对路径 b, err := ioutil.ReadFile(confPath + "/" + conf.UrlListFile) if err != nil { l4g.Error("readfile err[%s]", err) SlowExit() } //json to []string if err := json.Unmarshal(b, &seedUrls); err != nil { l4g.Error("parse json err[%s]", err) SlowExit() } //GOMAXPROCS设置 runtime.GOMAXPROCS(runtime.NumCPU()) // 启动爬虫 spider := spider.NewSpider(seedUrls, conf, confPath) spider.Start() // 等待任务完成 spider.Wait() time.Sleep(1 * time.Second) }