func NewNaiveBayes(c *LabeledCorpus) *NaiveBayes { labelCounts := make([]float64, c.LabelIndex.Len()) wordCounts := make([][]float64, c.LabelIndex.Len()) for i := range wordCounts { wordCounts[i] = make([]float64, c.V) for v := 0; v < c.V; v++ { wordCounts[i][v]++ // smoothing } } for d := 0; d < c.M; d++ { label := c.LabelIndex.Id(c.Labels[d]) labelCounts[label]++ for n := 0; n < c.N[d]; n++ { wordCounts[label][c.W[d][n]]++ } } util.Normalize(labelCounts) util.Lcounts(labelCounts) for _, counts := range wordCounts { util.Normalize(counts) util.Lcounts(counts) } return &NaiveBayes{c.LabelIndex, labelCounts, wordCounts} }
func (i *ITM) Threshold(cutoff float64) { for _, d := range rand.Perm(i.M) { for n := 0; n < i.N[d]; n++ { i.unsetZ(d, n) counts := i.conditional(d, n) util.Normalize(counts) bestZ := -1 bestP := math.Inf(-1) useBest := true for z, count := range counts { if count > bestP { bestZ = z bestP = count } if count < cutoff { counts[z] = 0 } else { useBest = false } } if useBest { i.setZ(d, n, bestZ) } else { i.setZ(d, n, util.SampleCounts(counts)) } } } }
func FindAnchors(Q *matrix.DenseMatrix, k, r int) []int { QRed := Q.Copy() for _, row := range QRed.Arrays() { util.Normalize(row) } QRed = QRed.Transpose() QRed = RandomProjection(QRed, r) QRed = QRed.Transpose() return GramSchmidt(QRed, k) }