func TestShadows(t *testing.T) { for i, test := range shadowTests { S := SegregationsFromCenters(test.x, test.centers, test.metric) shadows := Silhouettes(S, test.classes) if !mlgo.Vector(test.shadows).Equal(mlgo.Vector(shadows)) { t.Errorf("#%d Silhouettes(Separations(...), ...) got %v, want %v", i, shadows, test.shadows) } } }
func TestSilhouettes(t *testing.T) { for i, test := range silhouetteTests { d := NewDistances(test.x, test.metric) sil := Silhouettes(Segregations(d, test.classes), test.classes) if !mlgo.Vector(test.silhouettes).Equal(mlgo.Vector(sil)) { t.Errorf("#%d Silhouettes(Segregations(...), ...) got %v, want %v", i, sil, test.silhouettes) } } }
// TODO Do not count the silhouette of singleton clusters in the average? func SegregateByMeanSil(seg Segregator, K int) (s Split) { m := seg.Len() // silhouette can only be calculated for 2 <= k <= m - 1 if K <= 0 || K > m-1 { K = m - 1 } // maximize average silhouette avgSil := -1.0 optK := 0 var optClasses *Classes for k := 2; k <= K; k++ { classes := seg.Cluster(k) sil := Silhouettes(seg.Segregations(classes), classes) t := mlgo.Vector(sil).Mean() if t > avgSil { avgSil = t optK = k optClasses = classes } } s.K = optK s.Cost = 1 - avgSil s.Cl = optClasses return }
// K is the maximum number of clusters. // L is the maximum number of children clusters for any cluster. func SplitByMeanSplitSil(splitter Splitter, K, L int) (s Split) { m := splitter.Len() // average split silhouette can be only be calculated for 1 <= k <= m/3 // if k > m/3, at least one cluster would have < 3 elements // each cluster needs >= 3 elements to be further split into at least 2 clusters // for silhouette calculation if K <= 0 || K > m/3 { K = m / 3 } // minimize the mean split silhouette avgSplitSil := math.Inf(1) optK := 0 var optClasses *Classes for k := 1; k <= K; k++ { splitSil := make(Vector, k) classes := splitter.Cluster(k) partitions := classes.Partitions() n := 0 for kk := 0; kk < classes.K; kk++ { clustSplit := SegregateByMeanSil(splitter.Subset(partitions[kk]), L) if clustSplit.K > 0 { // cluster could be split further into children clusters splitSil[n] = 1 - clustSplit.Cost n++ } } // remove empty elements at end to account for clusters that could be not split further splitSil = splitSil[:n] t := mlgo.Vector(splitSil).Mean() //fmt.Println(k, t, splitSil, classes) if t < avgSplitSil { avgSplitSil = t optK = k optClasses = classes } } s.K = optK s.Cost = avgSplitSil s.Cl = optClasses return }