Exemple #1
0
func (i *ITM) SampleTopN(n int) {
	for _, d := range rand.Perm(i.M) {
		for w := 0; w < i.N[d]; w++ {
			i.unsetZ(d, w)

			allCounts := i.conditional(d, w)
			topCounts := make([]float64, i.T)
			for _, z := range util.TopN(util.FloatCounts(allCounts), n) {
				topCounts[z] = allCounts[z]
			}

			z := util.SampleCounts(topCounts)
			i.setZ(d, w, z)
		}
	}
}
Exemple #2
0
// TopNPerDoc filters words which are not in the top n in at least one document.
func TopNPerDoc(c *Corpus, n int) *Corpus {
	keep := make(map[string]struct{})
	for d := 0; d < c.M; d++ {
		counts := make([]int, c.V)
		for n := 0; n < c.N[d]; n++ {
			counts[c.W[d][n]]++
		}
		for _, v := range util.TopN(util.IntCounts(counts), n) {
			keep[c.Vocab.Tokens[v]] = struct{}{}
		}
	}

	remove := make(map[string]bool)
	for token := range keep {
		if _, ok := keep[token]; !ok {
			remove[token] = true
		}
	}

	return FilterStopwords(c, remove)
}
Exemple #3
0
func (b *base) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(b.TopicWords[z]), n)
}
Exemple #4
0
// TopicSummaryIds gets the top n tokens ids for the zth topic.
func (i *ITM) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(i.TopicConstraints[z]), n)
}
Exemple #5
0
// TopicSummaryIds gets the top n word token ids for the zth topic.
func (l *LDA) TopicSummaryIds(z, n int) []int {
	return util.TopN(util.IntCounts(l.TopicWords[z]), n)
}