コード例 #1
0
// Score a word in the context of the previous word.
func score(current, previous string) float64 {
	if help.Length(previous) == 0 {
		unigramScore := corpus.Unigrams().ScoreForWord(current)
		if unigramScore > 0 {
			// Probability of the current word
			return unigramScore / corpus.Total()
		} else {
			// Penalize words not found in the unigrams according to their length
			return 10.0 / (corpus.Total() * math.Pow(10, float64(help.Length(current))))
		}
	} else {
		// We've got a bigram
		unigramScore := corpus.Unigrams().ScoreForWord(previous)
		if unigramScore > 0 {
			bigramScore := corpus.Bigrams().ScoreForBigram(m.Bigram{previous, current, 0})
			if bigramScore > 0 {
				// Conditional probability of the word given the previous
				// word. The technical name is 'stupid backoff' and it's
				// not a probability distribution
				return bigramScore / corpus.Total() / score(previous, "<s>")
			}
		}

		return score(current, "")
	}
}
コード例 #2
0
// Create multiple (prefix, suffix) pairs from a text.
// The length of the prefix should not exceed the 'limit'.
func divide(text string, limit int) <-chan m.Possibility {
	ch := make(chan m.Possibility)
	bound := help.Min(help.Length(text), limit)

	go func() {
		for i := 1; i <= bound; i++ {
			ch <- m.Possibility{Prefix: text[:i], Suffix: text[i:]}
		}
		close(ch)
	}()

	return ch
}
コード例 #3
0
// Search for the best arrangement for a text in the context of a previous phrase.
func search(text, prev string) (ar m.Arrangement) {
	if help.Length(text) == 0 {
		return m.Arrangement{}
	}

	max := -10000000.0

	// Find the best candidate by finding the best arrangement rating
	for a := range findCandidates(text, prev) {
		if a.Rating > max {
			max = a.Rating
			ar = a
		}
	}

	return
}