Ejemplo n.º 1
0
func GetWordTopicDist(m *MGLDA, vocabulary []string, wt *bufio.Writer) {
	zGlCount := make([]int, m.GlobalK)
	zLocCount := make([]int, m.LocalK)
	wordGlCount := []map[int]int{}
	wordLocCount := []map[int]int{}
	for i := 0; i < m.GlobalK; i++ {
		wordGlCount = append(wordGlCount, map[int]int{})
	}
	for i := 0; i < m.LocalK; i++ {
		wordLocCount = append(wordLocCount, map[int]int{})
	}

	glog.Info("Get words distribution")
	for d, doc := range *m.Docs {
		for s, sent := range doc.Sentenses {
			for w, wd := range sent.Words {
				r := m.Rdsn[d][s][w]
				z := m.Zdsn[d][s][w]
				if r == globalTopic {
					zGlCount[z] += 1
					wordGlCount[z][wd] += 1
				} else {
					zLocCount[z] += 1
					wordLocCount[z][wd] += 1
				}
			}
		}
	}
	glog.Info("Done dist")
	phiGl, phiLoc := m.WordDist()
	for i := 0; i < m.GlobalK; i++ {
		header := fmt.Sprintf("-- global topic: %d (%d words)\n", i, zGlCount[i])
		wt.WriteString(header)
		glog.Info(header)
		rows := phiGl.RowCopy(i)
		idx := []int{}
		for j := 0; j < len(rows); j++ {
			idx = append(idx, j)
		}
		floats.Argsort(rows, idx)
		for j := len(idx) - 1; j > len(idx)-topicLimit; j-- {
			w := idx[j]
			tp := fmt.Sprintf("%s: %f (%d)\n",
				vocabulary[w], phiGl.Get(i, w),
				wordGlCount[i][w])
			wt.WriteString(tp)
			glog.Info(tp)
		}
	}
	for i := 0; i < m.LocalK; i++ {
		header := fmt.Sprintf("-- local topic: %d (%d words)\n", i, zLocCount[i])
		wt.WriteString(header)
		glog.Info(header)
		rows := phiLoc.RowCopy(i)
		idx := []int{}
		for j := 0; j < len(rows); j++ {
			idx = append(idx, j)
		}
		floats.Argsort(rows, idx)
		for j := len(idx) - 1; j > len(idx)-topicLimit; j-- {
			w := idx[j]
			tp := fmt.Sprintf("%s: %f (%d)\n",
				vocabulary[w], phiLoc.Get(i, w),
				wordLocCount[i][w])
			wt.WriteString(tp)
			glog.Info(tp)
		}
	}

}
Ejemplo n.º 2
0
/*
Free up some synapses in this segment. We always free up inactive
synapses (lowest permanence freed up first) before we start to free up
active ones.

param numToFree number of synapses to free up
param inactiveSynapseIndices list of the inactive synapse indices.
*/
func (s *Segment) freeNSynapses(numToFree int, inactiveSynapseIndices []int) {
	//Make sure numToFree isn't larger than the total number of syns we have
	if numToFree > len(s.syns) {
		panic("Number to free cannot be larger than existing synapses.")
	}

	if s.tp.params.Verbosity >= 5 {
		fmt.Println("freeNSynapses with numToFree=", numToFree)
		fmt.Println("inactiveSynapseIndices= ", inactiveSynapseIndices)
	}

	var candidates []int
	// Remove the lowest perm inactive synapses first
	if len(inactiveSynapseIndices) > 0 {
		perms := make([]float64, len(inactiveSynapseIndices))
		for idx, _ := range perms {
			perms[idx] = s.syns[idx].Permanence
		}
		var indexes []int
		floats.Argsort(perms, indexes)
		//sort perms
		cSize := mathutil.Min(numToFree, len(perms))
		candidates = make([]int, cSize)
		//indexes[0:cSize]
		for i := 0; i < cSize; i++ {
			candidates[i] = inactiveSynapseIndices[indexes[i]]
		}
	}

	// Do we need more? if so, remove the lowest perm active synapses too
	var activeSynIndices []int
	if len(candidates) < numToFree {
		for i := 0; i < len(s.syns); i++ {
			if !utils.ContainsInt(i, inactiveSynapseIndices) {
				activeSynIndices = append(activeSynIndices, i)
			}
		}

		perms := make([]float64, len(activeSynIndices))
		for i := range perms {
			perms[i] = s.syns[i].Permanence
		}
		var indexes []int
		floats.Argsort(perms, indexes)

		moreToFree := numToFree - len(candidates)
		//moreCandidates := make([]int, moreToFree)
		for i := 0; i < moreToFree; i++ {
			candidates = append(candidates, activeSynIndices[indexes[i]])
		}
	}

	if s.tp.params.Verbosity >= 4 {
		fmt.Printf("Deleting %v synapses from segment to make room for new ones: %v \n",
			len(candidates), candidates)
		fmt.Println("Before:", s.ToString())
	}

	// Delete candidate syns by copying undeleted to new slice
	var newSyns []Synapse
	for idx, val := range s.syns {
		if !utils.ContainsInt(idx, candidates) {
			newSyns = append(newSyns, val)
		}
	}
	s.syns = newSyns

	if s.tp.params.Verbosity >= 4 {
		fmt.Println("After:", s.ToString())
	}

}
Ejemplo n.º 3
0
// Inference runs a go routine for each doc.
func (m *MGLDA) Inference() {
	for d, doc := range *m.Docs {
		for s, sent := range doc.Sentenses {
			for w, wd := range sent.Words {
				v := m.Vdsn[d][s][w]
				r := m.Rdsn[d][s][w]
				z := m.Zdsn[d][s][w]

				if r == globalTopic {
					m.Nglzw.Set(z, wd, m.Nglzw.Get(z, wd)-1)
					m.Nglz.Set(z, 0, m.Nglz.Get(z, 0)-1)
					m.Ndvgl[d][s+v] -= 1
					m.Ndglz.Set(d, z, m.Ndglz.Get(d, z)-1)
					m.Ndgl.Set(d, 0, m.Ndgl.Get(d, 0)-1)
				} else {
					m.Nloczw.Set(z, wd, m.Nloczw.Get(z, wd)-1)
					m.Nlocz.Set(z, 0, m.Nlocz.Get(z, 0)-1)
					m.Ndvloc[d][s+v] -= 1
					m.Ndvlocz[d][s+v][z] -= 1
				}
				m.Ndsv[d][s][v] -= 1
				m.Nds[d][s] -= 1
				m.Ndv[d][s+v] -= 1

				pvrz := []float64{}
				newVs := []int{}
				newRs := []string{}
				newZs := []int{}
				for vt := 0; vt < m.T; vt++ {
					for zt := 0; zt < m.GlobalK; zt++ {
						newVs = append(newVs, vt)
						newRs = append(newRs, globalTopic)
						newZs = append(newZs, zt)
						term1 := (m.Nglzw.Get(zt, wd) + m.GlobalBeta) / (m.Nglz.Get(zt, 0) + float64(m.W)*m.GlobalBeta)
						term2 := (m.Ndsv[d][s][vt] + m.Gamma) / (m.Nds[d][s] + float64(m.T)*m.Gamma)
						term3 := (m.Ndvgl[d][s+vt] + m.GlobalAlphaMix) / (m.Ndv[d][s+vt] + m.GlobalAlphaMix + m.LocalAlphaMix)
						term4 := (m.Ndglz.Get(d, zt) + m.GlobalAlpha) / (m.Ndgl.Get(d, 0) + float64(m.GlobalK)*m.GlobalAlpha)
						pvrz = append(pvrz, term1*term2*term3*term4)

					}
					for zt := 0; zt < m.LocalK; zt++ {
						newVs = append(newVs, vt)
						newRs = append(newRs, localTopic)
						newZs = append(newZs, zt)
						term1 := (m.Nloczw.Get(zt, wd) + m.LocalBeta) / (m.Nlocz.Get(zt, 0) + float64(m.W)*m.LocalBeta)
						term2 := (m.Ndsv[d][s][vt] + m.Gamma) / (m.Nds[d][s] + float64(m.T)*m.Gamma)
						term3 := (m.Ndvloc[d][s+vt] + m.LocalAlphaMix) / (m.Ndv[d][s+vt] + m.GlobalAlphaMix + m.LocalAlphaMix)
						term4 := (m.Ndvlocz[d][s+vt][zt] + m.LocalAlpha) / (m.Ndvloc[d][s+vt] + float64(m.LocalK)*m.LocalAlpha)
						pvrz = append(pvrz, term1*term2*term3*term4)
					}
				}

				// sampling from multinomial distribution
				origIdx := []int{}
				var sum float64
				for j, item := range pvrz {
					sum += item
					origIdx = append(origIdx, j)
				}
				floats.Argsort(pvrz, origIdx)

				var randIdx int
				idxCount := map[int]int{}
				for i := 0; i < 100; i++ {
					var partialSum float64
					threshold := rand.Float64()
					for j := len(pvrz) - 1; j >= 0; j-- {
						partialSum += pvrz[j] / sum
						if partialSum >= threshold {
							idxCount[origIdx[j]] += 1
							break
						}
					}
				}
				var maxCount int
				for idx, cnt := range idxCount {
					if cnt > maxCount {
						randIdx = idx
					}
				}
				newV := newVs[randIdx]
				newR := newRs[randIdx]
				newZ := newZs[randIdx]
				// update
				if newR == globalTopic {
					m.Nglzw.Set(newZ, wd, m.Nglzw.Get(newZ, wd)+1)
					m.Nglz.Set(newZ, 0, m.Nglz.Get(newZ, 0)+1)
					m.Ndvgl[d][s+newV] += 1
					m.Ndglz.Set(d, newZ, m.Ndglz.Get(d, newZ)+1)
					m.Ndgl.Set(d, 0, m.Ndgl.Get(d, 0)+1)
				} else {
					m.Nloczw.Set(newZ, wd, m.Nloczw.Get(newZ, wd)+1)
					m.Nlocz.Set(newZ, 0, m.Nlocz.Get(newZ, 0)+1)
					m.Ndvloc[d][s+newV] += 1
					m.Ndvlocz[d][s+newV][newZ] += 1
				}
				m.Ndsv[d][s][newV] += 1
				m.Nds[d][s] += 1
				m.Ndv[d][s+newV] += 1

				m.Vdsn[d][s][w] = newV
				m.Rdsn[d][s][w] = newR
				m.Zdsn[d][s][w] = newZ
			}
		}
	}
}