// Xmeans runs k-means for k lower bound to k upper bound on a data set. // Once the k centroids have converged each cluster is bisected and the BIC // of the orginal cluster (parent = a model with one centroid) to the // the bisected model which consists of two centroids and whichever is greater // is committed to the set of clusters for this larger model k. // func Xmeans(datapoints, centroids *matrix.DenseMatrix, kmax int, cc, bisectcc CentroidChooser, measurer VectorMeasurer) ([]Model, map[string]error) { logname := "/var/tmp/xmeans.log" fp, err := os.OpenFile(logname, os.O_RDWR|os.O_APPEND, 0666) if err != nil { if os.IsNotExist(err) { fp, err = os.Create(logname) if err != nil { fmt.Printf("Xmeans: cannot open %s for logging.\n", logname) } } } log.SetOutput(io.Writer(fp)) k, _ := centroids.GetSize() log.Printf("Start k=%d kmax=%d\n", k, kmax) R, M := datapoints.GetSize() errs := make(map[string]error) runtime.GOMAXPROCS(numworkers) models := make([]Model, 0) for k <= kmax { log.Printf("kmeans started k=%d\n", k) model, err := kmeans(datapoints, centroids, measurer) if err != nil { errs[strconv.Itoa(k)] = err } // Bisect the returned clusters log.Println("bisect started") bimodel := bisect(model.Clusters, R, M, bisectcc, measurer) numCentroids := len(bimodel.Clusters) log.Printf("bisect returned %d clusters\n", numCentroids) models = append(models, model) var cent *matrix.DenseMatrix if numCentroids <= kmax { for rowexists := true; rowexists == true; { cent = cc.ChooseCentroids(datapoints, 1) rowexists = centroids.RowExists(cent) } centroids, err = centroids.AppendRow(cent) if err != nil { log.Printf("AppendRow: %v\n", err) errs["ApppendRow"] = err break } k++ } else { k = numCentroids } } log.Println("Finished") return models, errs }
// Xmeans runs k-means for k lower bound to k upper bound on a data set. // Once the k centroids have converged each cluster is bisected and the BIC // of the orginal cluster (parent = a model with one centroid) to the // the bisected model which consists of two centroids and whichever is greater // is committed to the set of clusters for this larger model k. // func Xmeans(datapoints, centroids *matrix.DenseMatrix, k, kmax int, cc, bisectcc CentroidChooser, measurer VectorMeasurer) ([]Model, map[string]error) { var err error // Uncomment logging code as well as the import statement above if you want simple logging to the elapsed // time between major events. /* logname := "/var/tmp/xmeans.log" fp, err := os.OpenFile(logname, os.O_RDWR|os.O_APPEND, 0666) if err != nil { if os.IsNotExist(err) { fp, err = os.Create(logname) if err != nil { fmt.Printf("Xmeans: cannot open %s for logging.\n", logname) } } } log.SetOutput(io.Writer(fp)) */ if k > kmax { m := make([]Model, 0) e := map[string]error{ "k": errors.New(fmt.Sprintf("k must be <= kmax. Received k=%d and kmax=%d.", k, kmax)), } return m, e } // log.Printf("Start k=%d kmax=%d\n", k, kmax) R, M := datapoints.GetSize() errs := make(map[string]error) runtime.GOMAXPROCS(numworkers) models := make([]Model, 0) for k <= kmax { // log.Printf("kmeans started k=%d\n", k) model := kmeans(datapoints, centroids, measurer) // Bisect the returned clusters // log.Println("bisect started") bimodel := bisect(model.Clusters, R, M, bisectcc, measurer) numCentroids := len(bimodel.Clusters) // log.Printf("bisect returned %d clusters\n", numCentroids) models = append(models, model) var cent *matrix.DenseMatrix if numCentroids <= kmax { for rowexists := true; rowexists == true; { cent = cc.ChooseCentroids(datapoints, 1) rowexists = centroids.RowExists(cent) } centroids, err = centroids.AppendRow(cent) if err != nil { errs["ApppendRow"] = err break } k++ } else { k = numCentroids } } // log.Println("Finished") return models, errs }