// TopNPerDoc filters words which are not in the top n in at least one document. func TopNPerDoc(c *Corpus, n int) *Corpus { keep := make(map[string]struct{}) for d := 0; d < c.M; d++ { counts := make([]int, c.V) for n := 0; n < c.N[d]; n++ { counts[c.W[d][n]]++ } for _, v := range util.TopN(util.IntCounts(counts), n) { keep[c.Vocab.Tokens[v]] = struct{}{} } } remove := make(map[string]bool) for token := range keep { if _, ok := keep[token]; !ok { remove[token] = true } } return FilterStopwords(c, remove) }
// TopicSummaryIds gets the top n tokens ids for the zth topic. func (i *ITM) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(i.TopicConstraints[z]), n) }
func (b *base) TopicSummary(z, n int) string { return util.TopNString(util.IntCounts(b.TopicWords[z]), n, b.Vocab.Tokens) }
func (b *base) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(b.TopicWords[z]), n) }
// TopicSummary gets top n word tokens for the zth topic. func (l *LDA) TopicSummary(z, n int) string { return util.TopNString(util.IntCounts(l.TopicWords[z]), n, l.Vocab.Tokens) }
// TopicSummaryIds gets the top n word token ids for the zth topic. func (l *LDA) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(l.TopicWords[z]), n) }