Beispiel #1
0
func (s *Spider) RegisterDownload(downloaders []downloader.Downloader) *Spider {
	s.rawChan = make(chan *raw.Raw, len(downloaders))

	for _, _downloader := range downloaders {
		_downloader.SetCallBack(s.rawChan)
	}

	s.downloadPool = downloader.NewPool(downloaders)
	return s
}
Beispiel #2
0
// register register all components which wasn't registered.
func (s *Spider) register() {

	if s.pagePool == nil {
		var pageProcessors []page.PageProcessor
		for i := uint(0); i < s.threadNum; i++ {
			pageProcessor := page.NewPageProcessor()
			pageProcessors = append(pageProcessors, pageProcessor)
		}
		s.pagePool = page.NewPool(pageProcessors)
	}

	if s.downloadPool == nil {
		var downloaders []downloader.Downloader
		for i := uint(0); i < s.threadNum; i++ {
			_downloader := downloader.New(fmt.Sprintf("down %d", i))
			_downloader.SetCallBack(s.rawChan)
			downloaders = append(downloaders, _downloader)
		}
		s.downloadPool = downloader.NewPool(downloaders)
	}

	s.download()
	s.page()
	s.analyse()

	if s._scheduler == nil {
		s._scheduler = scheduler.New()
	}

	s._scheduler.SetMaxDepth(s.depth)
	for _, req := range s.reqs {
		s._scheduler.Add(req)
	}

	if s.OnWatch {
		s.Watch()
	}

	time.Sleep(1 * time.Second)
}