Пример #1
0
func main() {
	sumR := 0.0
	sum3 := 0.0
	sum5 := 0.0
	sum10 := 0.0

	for _, importer := range load.Ambiant {
		corpus := importer.Import()

		dpmom := cluster.NewDPMoM(corpus, 1, 20, .2, .2)
		graphical.RunCCM(dpmom, cluster.NewConvergenceCheck(dpmom.Z), 0)

		gold := importer.Label(corpus)
		pred := eval.NewClusteringState(dpmom.Z)
		cont := eval.NewContingency(gold, pred)
		rerank := eval.Rerank(dpmom.Z)

		sumR += cont.Rand()
		sum3 += eval.SRecallK(3, rerank, gold)
		sum5 += eval.SRecallK(5, rerank, gold)
		sum10 += eval.SRecallK(10, rerank, gold)
	}

	n := float64(len(load.Ambiant))
	fmt.Println("Rand :", sumR/n)
	fmt.Println("SR@3 :", sum3/n)
	fmt.Println("SR@5 :", sum5/n)
	fmt.Println("SR@10:", sum10/n)
}
Пример #2
0
func (r Runner) EvalQuery(importer load.Importer) Stats {
	corpus := importer.Import()
	gold := importer.Label(corpus)

	z := r(corpus)

	pred := eval.NewClusteringState(z)
	cont := eval.NewContingency(gold, pred)
	rerank := eval.Rerank(z)

	stats := Stats{}
	stats.Rand = cont.Rand()
	for i, k := range Ks {
		stats.Recall[i] = eval.SRecallK(k, rerank, gold)
	}
	return stats
}
Пример #3
0
func main() {
	rand.Seed(time.Now().UnixNano())

	corpus := load.Newsgroups.Import()
	gold := load.Newsgroups.Label(corpus)

	mom := cluster.NewMoM(corpus, 20, .1, .01)
	checker := cluster.NewConvergenceCheck(mom.Z)
	converged := false

	var dur time.Duration
	var iter int

	fmt.Println("iter\ttime\tchanges\tf-measure\tARI\tVI")

	for !converged {
		start := time.Now()
		mom.CCM()
		end := time.Now()

		dur += end.Sub(start)
		iter += 1

		pred := eval.NewClusteringState(mom.Z)
		cont := eval.NewContingency(gold, pred)
		changes := checker.Check()

		line := []string{
			fmt.Sprintf("%d", iter),
			fmt.Sprintf("%.3f", dur.Seconds()),
			fmt.Sprintf("%d", changes),
			fmt.Sprintf("%.3f", cont.FMeasure()),
			fmt.Sprintf("%.3f", cont.ARI()),
			fmt.Sprintf("%.3f", cont.VI())}
		fmt.Println(strings.Join(line, "\t"))

		converged = changes == 0
	}

	fmt.Println()
	for z := 0; z < mom.T; z++ {
		fmt.Printf("%d: %s\n", z, mom.TopicSummary(z, 10))
	}
}