// Score a word in the context of the previous word. func score(current, previous string) float64 { if help.Length(previous) == 0 { unigramScore := corpus.Unigrams().ScoreForWord(current) if unigramScore > 0 { // Probability of the current word return unigramScore / corpus.Total() } else { // Penalize words not found in the unigrams according to their length return 10.0 / (corpus.Total() * math.Pow(10, float64(help.Length(current)))) } } else { // We've got a bigram unigramScore := corpus.Unigrams().ScoreForWord(previous) if unigramScore > 0 { bigramScore := corpus.Bigrams().ScoreForBigram(m.Bigram{previous, current, 0}) if bigramScore > 0 { // Conditional probability of the word given the previous // word. The technical name is 'stupid backoff' and it's // not a probability distribution return bigramScore / corpus.Total() / score(previous, "<s>") } } return score(current, "") } }
// Create multiple (prefix, suffix) pairs from a text. // The length of the prefix should not exceed the 'limit'. func divide(text string, limit int) <-chan m.Possibility { ch := make(chan m.Possibility) bound := help.Min(help.Length(text), limit) go func() { for i := 1; i <= bound; i++ { ch <- m.Possibility{Prefix: text[:i], Suffix: text[i:]} } close(ch) }() return ch }
// Search for the best arrangement for a text in the context of a previous phrase. func search(text, prev string) (ar m.Arrangement) { if help.Length(text) == 0 { return m.Arrangement{} } max := -10000000.0 // Find the best candidate by finding the best arrangement rating for a := range findCandidates(text, prev) { if a.Rating > max { max = a.Rating ar = a } } return }