Beispiel #1
0
func TestShadows(t *testing.T) {
	for i, test := range shadowTests {
		S := SegregationsFromCenters(test.x, test.centers, test.metric)
		shadows := Silhouettes(S, test.classes)
		if !mlgo.Vector(test.shadows).Equal(mlgo.Vector(shadows)) {
			t.Errorf("#%d Silhouettes(Separations(...), ...) got %v, want %v", i, shadows, test.shadows)
		}
	}
}
Beispiel #2
0
func TestSilhouettes(t *testing.T) {
	for i, test := range silhouetteTests {
		d := NewDistances(test.x, test.metric)
		sil := Silhouettes(Segregations(d, test.classes), test.classes)
		if !mlgo.Vector(test.silhouettes).Equal(mlgo.Vector(sil)) {
			t.Errorf("#%d Silhouettes(Segregations(...), ...) got %v, want %v", i, sil, test.silhouettes)
		}
	}
}
Beispiel #3
0
// TODO Do not count the silhouette of singleton clusters in the average?
func SegregateByMeanSil(seg Segregator, K int) (s Split) {
	m := seg.Len()

	// silhouette can only be calculated for 2 <= k <= m - 1

	if K <= 0 || K > m-1 {
		K = m - 1
	}

	// maximize average silhouette
	avgSil := -1.0
	optK := 0
	var optClasses *Classes
	for k := 2; k <= K; k++ {
		classes := seg.Cluster(k)
		sil := Silhouettes(seg.Segregations(classes), classes)
		t := mlgo.Vector(sil).Mean()
		if t > avgSil {
			avgSil = t
			optK = k
			optClasses = classes
		}
	}

	s.K = optK
	s.Cost = 1 - avgSil
	s.Cl = optClasses
	return
}
Beispiel #4
0
// K is the maximum number of clusters.
// L is the maximum number of children clusters for any cluster.
func SplitByMeanSplitSil(splitter Splitter, K, L int) (s Split) {
	m := splitter.Len()

	// average split silhouette can be only be calculated for 1 <= k <= m/3
	// if k > m/3, at least one cluster would have < 3 elements
	// each cluster needs >= 3 elements to be further split into at least 2 clusters
	//  for silhouette calculation

	if K <= 0 || K > m/3 {
		K = m / 3
	}

	// minimize the mean split silhouette
	avgSplitSil := math.Inf(1)
	optK := 0
	var optClasses *Classes
	for k := 1; k <= K; k++ {
		splitSil := make(Vector, k)
		classes := splitter.Cluster(k)
		partitions := classes.Partitions()
		n := 0
		for kk := 0; kk < classes.K; kk++ {
			clustSplit := SegregateByMeanSil(splitter.Subset(partitions[kk]), L)
			if clustSplit.K > 0 {
				// cluster could be split further into children clusters
				splitSil[n] = 1 - clustSplit.Cost
				n++
			}
		}
		// remove empty elements at end to account for clusters that could be not split further
		splitSil = splitSil[:n]
		t := mlgo.Vector(splitSil).Mean()
		//fmt.Println(k, t, splitSil, classes)
		if t < avgSplitSil {
			avgSplitSil = t
			optK = k
			optClasses = classes
		}
	}

	s.K = optK
	s.Cost = avgSplitSil
	s.Cl = optClasses
	return
}