Example #1
0
// crawl packages, send error back to end
func crawlPackages(httpClient doc.HttpClient, fpToCrawlPkg,
	fpOutNewDocs sophie.FsPath, end chan error) {

	time.AfterFunc(configs.CrawlerDuePerRun+time.Minute*10, func() {
		end <- errorsp.NewWithStacks("Crawling packages timeout!")
	})
	end <- func() error {
		outNewDocs := kv.DirOutput(fpOutNewDocs)
		outNewDocs.Clean()
		job := mr.MapOnlyJob{
			Source: []mr.Input{
				kv.DirInput(fpToCrawlPkg),
			},
			NewMapperF: func(src, part int) mr.OnlyMapper {
				return &PackageCrawler{
					part:       part,
					httpClient: httpClient,
				}
			},
			Dest: []mr.Output{
				outNewDocs,
			},
		}
		if err := job.Run(); err != nil {
			log.Printf("crawlPackages: job.Run failed: %v", err)
			return err
		}
		return nil
	}()
}
Example #2
0
// crawl packages, send error back to end
func crawlPersons(httpClient doc.HttpClient, fpToCrawlPsn sophie.FsPath, end chan error) {
	time.AfterFunc(gcse.CrawlerDuePerRun+time.Minute*10, func() {
		end <- errors.New("Crawling persons timeout!")
	})

	end <- func() error {
		job := mr.MapOnlyJob{
			Source: []mr.Input{
				kv.DirInput(fpToCrawlPsn),
			},

			NewMapperF: func(src, part int) mr.OnlyMapper {
				return &PersonCrawler{
					part:       part,
					httpClient: httpClient,
				}
			},
		}

		if err := job.Run(); err != nil {
			log.Printf("crawlPersons: job.Run failed: %v", err)
			return err
		}
		return nil
	}()
}