Esempio n. 1
0
// NewSpider()方法单元测试
func TestNewSpider(t *testing.T) {
	confPath := "../../conf"
	confFile := confPath + "/spider.conf"
	seedUrls := []string{"http://www.baidu.com"}
	conf, _ := conf.InitConf(confFile)
	if s := NewSpider(seedUrls, conf, confPath); s == nil {
		t.Error("spider.NewSpider failes")
	} else {
		t.Log("spider.NewSpider passed.")
	}
}
Esempio n. 2
0
// 爬虫主程序
func main() {
	// l4g的配置文件
	l4g.LoadConfiguration(SPIDER_LOGCONF_XML)

	// refer : http://www.01happy.com/golang-command-line-arguments/
	var confPath string
	var logPath string
	var printVer bool

	flag.StringVar(&confPath, "c", "../../conf", "config file path")
	flag.StringVar(&logPath, "l", "../../log", "log file path")
	flag.BoolVar(&printVer, "v", false, "print version")

	flag.Parse()

	if printVer {
		utils.PrintVersion()
		os.Exit(0)
	}

	l4g.Info("Hi, dash's %s is running...\n", "go_mini_spider")

	confFile := confPath + "/" + SPIDER_CONFIG_FILE
	conf, err := conf.InitConf(confFile)
	if err != nil {
		l4g.Error("read spider config failed, err [%s]", err)
		SlowExit()
	}

	var seedUrls []string
	// read and parse json,相对路径
	b, err := ioutil.ReadFile(confPath + "/" + conf.UrlListFile)
	if err != nil {
		l4g.Error("readfile err[%s]", err)
		SlowExit()
	}
	//json to []string
	if err := json.Unmarshal(b, &seedUrls); err != nil {
		l4g.Error("parse json err[%s]", err)
		SlowExit()
	}

	//GOMAXPROCS设置
	runtime.GOMAXPROCS(runtime.NumCPU())

	// 启动爬虫
	spider := spider.NewSpider(seedUrls, conf, confPath)
	spider.Start()
	// 等待任务完成
	spider.Wait()

	time.Sleep(1 * time.Second)
}