func main() {

	//var wg sync.WaitGroup

	//读取启动参数
	var configFile string
	flag.StringVar(&configFile, "conf", "config.ini", "configure file full path")
	flag.Parse()

	fmt.Printf("Start AccipitrSpiderEngine...\n")

	//启动日志系统
	logger, err := log4jzl.New("AccipitrSpiderEngine")
	if err != nil {
		fmt.Printf("[ERROR] Create logger Error: %v\n", err)
		//return
	}

	//读取配置文件
	configure, err := u.NewConfigure(configFile)
	if err != nil {
		fmt.Printf("[ERROR] Parse Configure File Error: %v\n", err)
		return
	}

	//初始化数据库适配器
	/*
		dbAdaptor, err := u.NewDBAdaptor(configure, logger)
		if err != nil {
			fmt.Printf("[ERROR] Create DB Adaptor Error: %v\n", err)
			return
		}
		defer dbAdaptor.Release()
	*/
	//初始化KVDB数据库
	DBname, _ := configure.GetKVDB()
	var kvdb *u.PandionKV
	if !Exist(fmt.Sprintf("./DB/%v.idx", DBname)) {
		kvdb = u.NewPandionKV(DBname, logger)
	} else {
		kvdb = u.NewPandionKVWithFile(DBname, logger)
	}

	router := &Router{Kvdb: kvdb}

	//初始化自定义的插件
	ps := make(map[string]u.PlugInterface)
	plug := Plugs.NewYXQ("yingxiaoqun", logger, nil) // dbAdaptor)
	plugyo := Plugs.NewRadialeng("radialeng", logger, nil)
	maudio := Plugs.NewMAudio("maudio", logger, nil)
	jblpro := Plugs.NewJblpro("jblpor", logger, nil)
	ikmultimedia := Plugs.NewIkmultimedia("ikmultimedia", logger, nil)
	avid := Plugs.NewAvid("avid", logger, nil)
	shure := Plugs.NewShure("shure", logger, nil)
	ps["yingxiaoqun"] = plug
	ps["radialeng"] = plugyo
	ps["maudio"] = maudio
	ps["jblpro"] = jblpro
	ps["ikmultimedia"] = ikmultimedia
	ps["avid"] = avid
	ps["shure"] = shure
	//启动调度器
	scheduler := Scheduler.NewScheduler(9, logger, kvdb)
	out_chan, _ := scheduler.ConfigScheduler()
	scheduler.StartScheduler()

	//启动分发器
	dispatcher := Dispatcher.NewDispatcher(out_chan, logger)
	dispatcher.ConfigureDispatcher(9, ps, scheduler)
	dispatcher.StartDispatcher()

	//读取种子url
	urls, err := ReadSeedUrls()
	if err != nil {
		fmt.Printf("[ERROR] ReadSeedUrls Error: %v\n", err)
		return
	}

	for k, v := range urls {
		for _, url := range v {
			scheduler.SendUrl(u.CrawlData{Url: url, Type: u.SEED_URL, HandlerName: k})
		}
	}

	addr := fmt.Sprintf(":%d", 9999)
	go http.ListenAndServe(addr, router)
	//err = http.ListenAndServe(addr, router)
	//if err != nil {
	//	logger.Error("Server start fail: %v", err)
	//	os.Exit(1)
	//}
	fmt.Printf("START SERVER : %v\n", addr)

	interval, _ := configure.GetInterval()
	for {
		select {
		case <-time.After(time.Second * time.Duration(interval)):
			for k, v := range urls {
				for _, url := range v {
					scheduler.SendUrl(u.CrawlData{Url: url, Type: u.SEED_URL, HandlerName: k})
				}
			}

		}
	}

	//out<-u.SpiderOut{Url:"http://www.yingxiaoqun.com111",Content:"",Header:""}
	u.SpiderSync.Wait()
	//time.Sleep(100*time.Second)

}
Beispiel #2
0
func main() {

	fmt.Printf("init FalconEngine.....\n")
	//读取启动参数
	var configFile string
	var search string
	var cores int
	var err error
	flag.StringVar(&configFile, "conf", "search.conf", "configure file full path")
	flag.StringVar(&search, "mode", "search", "start mode[ search | build ]")
	flag.IntVar(&cores, "core", 4, "cpu cores")
	flag.Parse()

	runtime.GOMAXPROCS(cores)
	//读取配置文件
	configure, err := BaseFunctions.NewConfigure(configFile)
	if err != nil {
		fmt.Printf("[ERROR] Parse Configure File Error: %v\n", err)
		return
	}

	//启动日志系统
	logger, err := log4jzl.New("FalconEngine")
	if err != nil {
		fmt.Printf("[ERROR] Create logger Error: %v\n", err)
		//return
	}

	//初始化数据库适配器
	dbAdaptor, err := BaseFunctions.NewDBAdaptor(configure, logger)
	if err != nil {
		fmt.Printf("[ERROR] Create DB Adaptor Error: %v\n", err)
		return
	}
	defer dbAdaptor.Release()

	//初始化本地redis
	redisClient, err := BaseFunctions.NewRedisClient(configure, logger)
	if err != nil {
		fmt.Printf("[ERROR] Create redisClient Error: %v\n", err)
		return
	}
	defer redisClient.Release()

	if search == "search" {

		processor := &BaseFunctions.BaseProcessor{configure, logger, dbAdaptor, redisClient}
		bitmap := utils.NewBitmap()
		fields, err := configure.GetTableFields()
		if err != nil {
			logger.Error("%v", err)
			return
		}
		index_set := indexer.NewIndexSet(bitmap, logger)
		index_set.InitIndexSet(fields)

		searcher := NewSearcher(processor, index_set) // &Searcher{processor}
		data_chan := make(chan builder.UpdateInfo, 1000)
		updater := NewUpdater(processor, index_set, data_chan)
		updater.IncUpdating()
		router := &BaseFunctions.Router{configure, logger, map[string]BaseFunctions.FEProcessor{
			"search": searcher,
			"update": updater,
		}}

		builder := NewBuilderEngine(configure, dbAdaptor, logger, redisClient, index_set)
		builder.StartIncUpdate(data_chan)

		logger.Info("Server Start...")
		port, _ := configure.GetPort()
		addr := fmt.Sprintf(":%d", port)
		err = http.ListenAndServe(addr, router)
		if err != nil {
			logger.Error("Server start fail: %v", err)
			os.Exit(1)
		}

	} else if search == "build" {

		builder := NewBuilderEngine(configure, dbAdaptor, logger, redisClient, nil)
		builder.BuidingAllIndex()
	} else {
		logger.Error("Wrong start mode...only support [ search | build ]")
	}

}