func main() { sumR := 0.0 sum3 := 0.0 sum5 := 0.0 sum10 := 0.0 for _, importer := range load.Ambiant { corpus := importer.Import() dpmom := cluster.NewDPMoM(corpus, 1, 20, .2, .2) graphical.RunCCM(dpmom, cluster.NewConvergenceCheck(dpmom.Z), 0) gold := importer.Label(corpus) pred := eval.NewClusteringState(dpmom.Z) cont := eval.NewContingency(gold, pred) rerank := eval.Rerank(dpmom.Z) sumR += cont.Rand() sum3 += eval.SRecallK(3, rerank, gold) sum5 += eval.SRecallK(5, rerank, gold) sum10 += eval.SRecallK(10, rerank, gold) } n := float64(len(load.Ambiant)) fmt.Println("Rand :", sumR/n) fmt.Println("SR@3 :", sum3/n) fmt.Println("SR@5 :", sum5/n) fmt.Println("SR@10:", sum10/n) }
func main() { rand.Seed(time.Now().UnixNano()) corpus := load.Newsgroups.Import() gold := load.Newsgroups.Label(corpus) mom := cluster.NewMoM(corpus, 20, .1, .01) checker := cluster.NewConvergenceCheck(mom.Z) converged := false var dur time.Duration var iter int fmt.Println("iter\ttime\tchanges\tf-measure\tARI\tVI") for !converged { start := time.Now() mom.CCM() end := time.Now() dur += end.Sub(start) iter += 1 pred := eval.NewClusteringState(mom.Z) cont := eval.NewContingency(gold, pred) changes := checker.Check() line := []string{ fmt.Sprintf("%d", iter), fmt.Sprintf("%.3f", dur.Seconds()), fmt.Sprintf("%d", changes), fmt.Sprintf("%.3f", cont.FMeasure()), fmt.Sprintf("%.3f", cont.ARI()), fmt.Sprintf("%.3f", cont.VI())} fmt.Println(strings.Join(line, "\t")) converged = changes == 0 } fmt.Println() for z := 0; z < mom.T; z++ { fmt.Printf("%d: %s\n", z, mom.TopicSummary(z, 10)) } }
func MMCCM(c *pipeline.Corpus) []int { mom := MOM(c) graphical.RunCCM(mom, cluster.NewConvergenceCheck(mom.Z), 0) return mom.Z }
func DPCCM(c *pipeline.Corpus) []int { dpmom := DPMOM(c) graphical.RunCCM(dpmom, cluster.NewConvergenceCheck(dpmom.Z), 100) return dpmom.Z }