Example #1
0
func main() {
	if err := env.Load("conf", "scheduler.conf"); err != nil {
		panic(err.Error())
	}
	defer env.Close()

	sch, err := scheduler.NewScheduler()
	if err != nil {
		panic("init scheduler error: " + err.Error())
	}

	sch.Work()

}
func main() {

	//var wg sync.WaitGroup

	//读取启动参数
	var configFile string
	flag.StringVar(&configFile, "conf", "config.ini", "configure file full path")
	flag.Parse()

	fmt.Printf("Start AccipitrSpiderEngine...\n")

	//启动日志系统
	logger, err := log4jzl.New("AccipitrSpiderEngine")
	if err != nil {
		fmt.Printf("[ERROR] Create logger Error: %v\n", err)
		//return
	}

	//读取配置文件
	configure, err := u.NewConfigure(configFile)
	if err != nil {
		fmt.Printf("[ERROR] Parse Configure File Error: %v\n", err)
		return
	}

	//初始化数据库适配器
	/*
		dbAdaptor, err := u.NewDBAdaptor(configure, logger)
		if err != nil {
			fmt.Printf("[ERROR] Create DB Adaptor Error: %v\n", err)
			return
		}
		defer dbAdaptor.Release()
	*/
	//初始化KVDB数据库
	DBname, _ := configure.GetKVDB()
	var kvdb *u.PandionKV
	if !Exist(fmt.Sprintf("./DB/%v.idx", DBname)) {
		kvdb = u.NewPandionKV(DBname, logger)
	} else {
		kvdb = u.NewPandionKVWithFile(DBname, logger)
	}

	router := &Router{Kvdb: kvdb}

	//初始化自定义的插件
	ps := make(map[string]u.PlugInterface)
	plug := Plugs.NewYXQ("yingxiaoqun", logger, nil) // dbAdaptor)
	plugyo := Plugs.NewRadialeng("radialeng", logger, nil)
	maudio := Plugs.NewMAudio("maudio", logger, nil)
	jblpro := Plugs.NewJblpro("jblpor", logger, nil)
	ikmultimedia := Plugs.NewIkmultimedia("ikmultimedia", logger, nil)
	avid := Plugs.NewAvid("avid", logger, nil)
	shure := Plugs.NewShure("shure", logger, nil)
	ps["yingxiaoqun"] = plug
	ps["radialeng"] = plugyo
	ps["maudio"] = maudio
	ps["jblpro"] = jblpro
	ps["ikmultimedia"] = ikmultimedia
	ps["avid"] = avid
	ps["shure"] = shure
	//启动调度器
	scheduler := Scheduler.NewScheduler(9, logger, kvdb)
	out_chan, _ := scheduler.ConfigScheduler()
	scheduler.StartScheduler()

	//启动分发器
	dispatcher := Dispatcher.NewDispatcher(out_chan, logger)
	dispatcher.ConfigureDispatcher(9, ps, scheduler)
	dispatcher.StartDispatcher()

	//读取种子url
	urls, err := ReadSeedUrls()
	if err != nil {
		fmt.Printf("[ERROR] ReadSeedUrls Error: %v\n", err)
		return
	}

	for k, v := range urls {
		for _, url := range v {
			scheduler.SendUrl(u.CrawlData{Url: url, Type: u.SEED_URL, HandlerName: k})
		}
	}

	addr := fmt.Sprintf(":%d", 9999)
	go http.ListenAndServe(addr, router)
	//err = http.ListenAndServe(addr, router)
	//if err != nil {
	//	logger.Error("Server start fail: %v", err)
	//	os.Exit(1)
	//}
	fmt.Printf("START SERVER : %v\n", addr)

	interval, _ := configure.GetInterval()
	for {
		select {
		case <-time.After(time.Second * time.Duration(interval)):
			for k, v := range urls {
				for _, url := range v {
					scheduler.SendUrl(u.CrawlData{Url: url, Type: u.SEED_URL, HandlerName: k})
				}
			}

		}
	}

	//out<-u.SpiderOut{Url:"http://www.yingxiaoqun.com111",Content:"",Header:""}
	u.SpiderSync.Wait()
	//time.Sleep(100*time.Second)

}