func (a *print_tokens_action) Run() { SetupLogging(*a.verbosity) writer := new(filewriter.TrecFileWriter) writer.Init(*a.tokenOutputPath) go writer.WriteAllTokens() docStream := make(chan filereader.Document) walker := new(DocWalker) walker.WalkDocuments(*a.docroot, *a.docpattern, docStream) for doc := range docStream { for t := range doc.Tokens() { log.Tracef("Adding token: %s", t) writer.StringChan <- &t.Text } log.Debugf("Document %s (%d tokens)\n", doc.Identifier(), doc.Len()) } log.Info("Done reading from the docStream") close(writer.StringChan) // Wait for the writer to finish writer.Wait() }
func (a *run_tagger_action) Run() { SetupLogging(*a.verbosity) a.loadTokens() log.Debug("Tokens loaded") log.Debug("Filling connection pool") a.setupConnPool() taggers := new(tagger.Taggers) taggers.Init(a.connPool, a.workers) go taggers.Spawn() log.Debug("Tagging") // For each token, find it in the db for i := range a.tokens { taggers.Queue <- &a.tokens[i] } close(taggers.Queue) <-taggers.Done // Write the missing tokens to disk fw := new(filewriter.TrecFileWriter) fw.Init("/tmp/missing_tokens") go fw.WriteAllTokens() for i := range taggers.MissingTokens { fw.StringChan <- &taggers.MissingTokens[i] } close(fw.StringChan) fw.Wait() // If not found... }