Beispiel #1
0
// TopNPerDoc filters words which are not in the top n in at least one document.
func TopNPerDoc(c *Corpus, n int) *Corpus {
	keep := make(map[string]struct{})
	for d := 0; d < c.M; d++ {
		counts := make([]int, c.V)
		for n := 0; n < c.N[d]; n++ {
			counts[c.W[d][n]]++
		}
		for _, v := range util.TopN(util.IntCounts(counts), n) {
			keep[c.Vocab.Tokens[v]] = struct{}{}
		}
	}

	remove := make(map[string]bool)
	for token := range keep {
		if _, ok := keep[token]; !ok {
			remove[token] = true
		}
	}

	return FilterStopwords(c, remove)
}
Beispiel #2
0
// TopicSummaryIds gets the top n tokens ids for the zth topic.
func (i *ITM) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(i.TopicConstraints[z]), n)
}
Beispiel #3
0
func (b *base) TopicSummary(z, n int) string {
	return util.TopNString(util.IntCounts(b.TopicWords[z]), n, b.Vocab.Tokens)
}
Beispiel #4
0
func (b *base) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(b.TopicWords[z]), n)
}
Beispiel #5
0
// TopicSummary gets top n word tokens for the zth topic.
func (l *LDA) TopicSummary(z, n int) string {
	return util.TopNString(util.IntCounts(l.TopicWords[z]), n, l.Vocab.Tokens)
}
Beispiel #6
0
// TopicSummaryIds gets the top n word token ids for the zth topic.
func (l *LDA) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(l.TopicWords[z]), n)
}