// Function ProcessFile process the given file and incorporate the information // into the NGramGenerator g for future N-Gram model generation. func (g *NGramGenerator) ProcessFile(filename string) error { var decoder mahonia.Decoder if g.charset != "" { decoder = mahonia.NewDecoder(g.charset) } lineProcessor := func(line string) (bool, error) { line = strings.Trim(line, " \t\n\f\b\r") if decoder != nil { line = decoder.ConvertString(line) } tokens := strings.Split(line, " ") var prevToken string for i, t := range tokens { //Monogram frequency g.uniGram[t]++ g.uniGramCount++ //Bigram frequency var key BiGramKey if i == 0 { key = BiGramKey{SentenceStartTag, t} } else { key = BiGramKey{prevToken, t} } fmt.Printf("%v\n", key) g.biGram[key]++ g.biGramCount++ prevToken = t } return true, nil } return util.ForEachLineInFile(filename, lineProcessor) }