func (i *ITM) SampleTopN(n int) { for _, d := range rand.Perm(i.M) { for w := 0; w < i.N[d]; w++ { i.unsetZ(d, w) allCounts := i.conditional(d, w) topCounts := make([]float64, i.T) for _, z := range util.TopN(util.FloatCounts(allCounts), n) { topCounts[z] = allCounts[z] } z := util.SampleCounts(topCounts) i.setZ(d, w, z) } } }
// TopNPerDoc filters words which are not in the top n in at least one document. func TopNPerDoc(c *Corpus, n int) *Corpus { keep := make(map[string]struct{}) for d := 0; d < c.M; d++ { counts := make([]int, c.V) for n := 0; n < c.N[d]; n++ { counts[c.W[d][n]]++ } for _, v := range util.TopN(util.IntCounts(counts), n) { keep[c.Vocab.Tokens[v]] = struct{}{} } } remove := make(map[string]bool) for token := range keep { if _, ok := keep[token]; !ok { remove[token] = true } } return FilterStopwords(c, remove) }
func (b *base) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(b.TopicWords[z]), n) }
// TopicSummaryIds gets the top n tokens ids for the zth topic. func (i *ITM) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(i.TopicConstraints[z]), n) }
// TopicSummaryIds gets the top n word token ids for the zth topic. func (l *LDA) TopicSummaryIds(z, n int) []int { return util.TopN(util.IntCounts(l.TopicWords[z]), n) }