func TestFilterSmallClusters(t *testing.T) { g := FilterSmallClusters([]*clustering.Cluster{}, 5) if g == nil { t.Errorf("result should not be nil") } if len(g) != 0 { t.Errorf("zero group input should result in zero group output") } testSet := []*clustering.Cluster{ clustering.NewCluster(&event{Location: geo.NewPoint(1, 1)}), clustering.NewCluster(&event{Location: geo.NewPoint(1, 1)}, &event{Location: geo.NewPoint(2, 2)}), } g = FilterSmallClusters(testSet, 5) if l := len(g); l != 0 { t.Errorf("should filter out small groups, but got %d", l) } if l := len(testSet); l != 2 { t.Errorf("should not change test set, but got length %d", l) } g = FilterSmallClusters(testSet, 2) if l := len(g); l != 1 { t.Errorf("should filter out small groups, but got %d", l) } if l := len(testSet); l != 2 { t.Errorf("should not change test set, but got length %d", l) } }
// RemoveOutlierPointersByQuadkey will bucket all pointers by quad key (defined by the level) // and remove the buckets with less than threshold pointers. The buckets become the resulting point_clustering.Clusters. func RemoveOutlierPointersByQuadkey(pointers []geo.Pointer, level, threshold int) []*clustering.Cluster { buckets := make(map[int64][]geo.Pointer) for _, p := range pointers { key := p.Point().Quadkey(level) buckets[key] = append(buckets[key], p) } clusters := make([]*clustering.Cluster, 0, len(buckets)) for _, b := range buckets { if len(b) >= threshold { clusters = append(clusters, clustering.NewCluster(b...)) } } return clusters }
// RematchPointersToClusters will take a set of pointers and map them to the closest cluster. // Basically creates a new cluster from that one point and does the ClusterDistance between them. // Will return a new list. func RematchPointersToClusters( clusters []*clustering.Cluster, pointers []geo.Pointer, distancer clustering.ClusterDistancer, threshold float64, ) []*clustering.Cluster { if len(clusters) == 0 { return []*clustering.Cluster{} } newClusters := make([]*clustering.Cluster, 0, len(clusters)) // clear the current members for _, c := range clusters { newClusters = append(newClusters, clustering.NewClusterWithCentroid(c.Centroid)) } // remap all the groupers to these new groups for _, pointer := range pointers { minDist := math.MaxFloat64 index := 0 pointerCluster := clustering.NewCluster(pointer) // find the closest group for i, c := range newClusters { if d := distancer.ClusterDistance(c, pointerCluster); d < minDist { minDist = d index = i } } if minDist < threshold { // leaves the center as found by the previous clustering newClusters[index].Pointers = append(newClusters[index].Pointers, pointer) } } return newClusters }