Exemplo n.º 1
0
func (a *print_tokens_action) Run() {
	SetupLogging(*a.verbosity)

	writer := new(filewriter.TrecFileWriter)
	writer.Init(*a.tokenOutputPath)
	go writer.WriteAllTokens()

	docStream := make(chan filereader.Document)

	walker := new(DocWalker)
	walker.WalkDocuments(*a.docroot, *a.docpattern, docStream)

	for doc := range docStream {

		for t := range doc.Tokens() {
			log.Tracef("Adding token: %s", t)
			writer.StringChan <- &t.Text
		}

		log.Debugf("Document %s (%d tokens)\n", doc.Identifier(), doc.Len())
	}

	log.Info("Done reading from the docStream")
	close(writer.StringChan)

	// Wait for the writer to finish
	writer.Wait()
}
Exemplo n.º 2
0
func (a *run_tagger_action) Run() {
	SetupLogging(*a.verbosity)

	a.loadTokens()
	log.Debug("Tokens loaded")

	log.Debug("Filling connection pool")
	a.setupConnPool()

	taggers := new(tagger.Taggers)
	taggers.Init(a.connPool, a.workers)
	go taggers.Spawn()

	log.Debug("Tagging")
	// For each token, find it in the db
	for i := range a.tokens {
		taggers.Queue <- &a.tokens[i]
	}

	close(taggers.Queue)
	<-taggers.Done

	// Write the missing tokens to disk
	fw := new(filewriter.TrecFileWriter)
	fw.Init("/tmp/missing_tokens")
	go fw.WriteAllTokens()
	for i := range taggers.MissingTokens {
		fw.StringChan <- &taggers.MissingTokens[i]
	}
	close(fw.StringChan)
	fw.Wait()

	// If not found...
}