func main() {
	statusFile, _ := os.Create("status.url")
	defer statusFile.Close()

	engine.NewEngine("crawling_status").
		SetStartUrls(getUrlsFromFile("test.url")).
		AddPlugin(plugin.NewStatusPlugin(statusFile)).
		Start()
}
示例#2
0
func main() {
	if len(os.Args) < 5 {
		log.Printf("lost argument")
		return
	}

	configFilePath, inFilePath, outFilePath, statusFilePath, logFilePath :=
		os.Args[1], os.Args[2], os.Args[3], os.Args[4], os.Args[5]

	domains := LoadAndDiff(inFilePath, statusFilePath)
	log.Printf("load %d urls from %s", len(domains), inFilePath)

	outFile, _ := os.OpenFile(outFilePath, os.O_RDWR|os.O_APPEND, 0660)
	defer outFile.Close()
	statusFile, _ := os.OpenFile(statusFilePath, os.O_RDWR|os.O_APPEND, 0660)
	defer statusFile.Close()
	logFile, _ := os.OpenFile(logFilePath, os.O_RDWR|os.O_APPEND, 0660)
	defer logFile.Close()
	log.SetOutput(io.MultiWriter(logFile, os.Stdout))
	log.SetFlags(log.LstdFlags | log.Lshortfile)

	urls := []string{}
	for _, domain := range domains {
		urls = append(urls, "http://bgp.he.net/dns/"+domain)
	}

	e := engine.
		NewQuickEngine(configFilePath).
		SetOutputFile(outFile).
		GetEngine()
	gConfig = e.GetConfig()
	e.SetStartUrls(urls).
		AddPlugin(plugin.NewProxyPlugin()).
		AddPlugin(plugin.NewCookiePlugin(GetCookieFunc)).
		AddPlugin(plugin.NewStatusPlugin(statusFile))
	e.Start()
}