func (chanman *myChannelManager) Init(channelArgs base.ChannelArgs, reset bool) bool { if err := channelArgs.Check(); err != nil { panic(err) } chanman.rwmutex.Lock() defer chanman.rwmutex.Unlock() if chanman.status == CHANNEL_MANAGER_STATUS_INITIALIZED && !reset { return false } chanman.channelArgs = channelArgs chanman.reqCh = make(chan base.Request, channelArgs.ReqChanLen()) chanman.respCh = make(chan base.Response, channelArgs.RespChanLen()) chanman.itemCh = make(chan base.Item, channelArgs.ItemChanLen()) chanman.errorCh = make(chan error, channelArgs.ErrorChanLen()) chanman.status = CHANNEL_MANAGER_STATUS_INITIALIZED return true }
func (sched *myScheduler) Start( channelArgs base.ChannelArgs, poolBaseArgs base.PoolBaseArgs, crawlDepth uint32, httpClientGenerator GenHttpClient, respParsers []anlz.ParseResponse, itemProcessors []ipl.ProcessItem, firstHttpReq *http.Request) (err error) { defer func() { if p := recover(); p != nil { errMsg := fmt.Sprintf("Fatal Scheduler Error: %s\n", p) log.Error(errMsg) err = errors.New(errMsg) } }() if atomic.LoadUint32(&sched.running) == 1 { return errors.New("The scheduler has been started!\n") } atomic.StoreUint32(&sched.running, 1) if err := channelArgs.Check(); err != nil { return err } sched.channelArgs = channelArgs if err := poolBaseArgs.Check(); err != nil { return err } sched.poolBaseArgs = poolBaseArgs sched.crawlDepth = crawlDepth sched.chanman = generateChannelManager(sched.channelArgs) if httpClientGenerator == nil { return errors.New("The HTTP client generator list is invalid!") } dlpool, err := generatePageDownloaderPool( sched.poolBaseArgs.PageDownloaderPoolSize(), httpClientGenerator) if err != nil { errMsg := fmt.Sprintf("Occur error when get page downloader pool: %s\n", err) return errors.New(errMsg) } sched.dlpool = dlpool analyzerPool, err := generateAnalyzerPool(sched.poolBaseArgs.AnalyzerPoolSize()) if err != nil { if err != nil { errMsg := fmt.Sprintf("Occur error when get analyzer pool: %s\n", err) return errors.New(errMsg) } } sched.analyzerPool = analyzerPool if itemProcessors == nil { return errors.New("The item processor list is invalid!") } for i, ip := range itemProcessors { if ip == nil { return errors.New(fmt.Sprintf("The %dth item processor is invalid!", i)) } } sched.itemPipeline = generateItemPipeline(itemProcessors) if sched.stopSign == nil { sched.stopSign = mdw.NewStopSign() } else { sched.stopSign.Reset() } sched.reqCache = newRequestCache() sched.urlMap = make(map[string]bool) sched.startDownloading() sched.activateAnalyzers(respParsers) sched.openItemPipeline() sched.schedule(10 * time.Millisecond) if firstHttpReq == nil { return errors.New("The first HTTP request is invalid!") } pd, err := getPrimaryDomain(firstHttpReq.Host) if err != nil { return err } sched.primaryDomain = pd firstReq := base.NewRequest(firstHttpReq, 0) sched.reqCache.put(firstReq) return nil }