Ejemplo n.º 1
0
// Xmeans runs k-means for k lower bound to k upper bound on a data set.
// Once the k centroids have converged each cluster is bisected and the BIC
// of the orginal cluster (parent = a model with one centroid) to the
// the bisected model which consists of two centroids and whichever is greater
// is committed to the set of clusters for this larger model k.
//
func Xmeans(datapoints, centroids *matrix.DenseMatrix, kmax int, cc, bisectcc CentroidChooser, measurer VectorMeasurer) ([]Model, map[string]error) {
	logname := "/var/tmp/xmeans.log"
	fp, err := os.OpenFile(logname, os.O_RDWR|os.O_APPEND, 0666)
	if err != nil {
		if os.IsNotExist(err) {
			fp, err = os.Create(logname)
			if err != nil {
				fmt.Printf("Xmeans: cannot open %s for logging.\n", logname)
			}
		}
	}

	log.SetOutput(io.Writer(fp))

	k, _ := centroids.GetSize()
	log.Printf("Start k=%d kmax=%d\n", k, kmax)

	R, M := datapoints.GetSize()
	errs := make(map[string]error)
	runtime.GOMAXPROCS(numworkers)
	models := make([]Model, 0)

	for k <= kmax {
		log.Printf("kmeans started k=%d\n", k)
		model, err := kmeans(datapoints, centroids, measurer)
		if err != nil {
			errs[strconv.Itoa(k)] = err
		}

		// Bisect the returned clusters
		log.Println("bisect started")
		bimodel := bisect(model.Clusters, R, M, bisectcc, measurer)
		numCentroids := len(bimodel.Clusters)
		log.Printf("bisect returned %d clusters\n", numCentroids)
		models = append(models, model)

		var cent *matrix.DenseMatrix

		if numCentroids <= kmax {
			for rowexists := true; rowexists == true; {
				cent = cc.ChooseCentroids(datapoints, 1)
				rowexists = centroids.RowExists(cent)
			}

			centroids, err = centroids.AppendRow(cent)
			if err != nil {
				log.Printf("AppendRow: %v\n", err)
				errs["ApppendRow"] = err
				break
			}
			k++
		} else {
			k = numCentroids
		}
	}

	log.Println("Finished")
	return models, errs
}
Ejemplo n.º 2
0
// Xmeans runs k-means for k lower bound to k upper bound on a data set.
// Once the k centroids have converged each cluster is bisected and the BIC
// of the orginal cluster (parent = a model with one centroid) to the
// the bisected model which consists of two centroids and whichever is greater
// is committed to the set of clusters for this larger model k.
//
func Xmeans(datapoints, centroids *matrix.DenseMatrix, k, kmax int, cc, bisectcc CentroidChooser, measurer VectorMeasurer) ([]Model, map[string]error) {
	var err error

	// Uncomment logging code as well as the import statement above if you want simple logging to the elapsed
	// time between major events.
	/*	logname := "/var/tmp/xmeans.log"
			fp, err :=  os.OpenFile(logname, os.O_RDWR|os.O_APPEND, 0666)
			if err != nil {
				if os.IsNotExist(err) {
					fp, err = os.Create(logname)
		    		if err != nil {
			    		fmt.Printf("Xmeans: cannot open %s for logging.\n", logname)
					}
				}
			}

			log.SetOutput(io.Writer(fp))
	*/
	if k > kmax {
		m := make([]Model, 0)
		e := map[string]error{
			"k": errors.New(fmt.Sprintf("k must be <= kmax.  Received k=%d and kmax=%d.", k, kmax)),
		}
		return m, e
	}

	//	log.Printf("Start k=%d kmax=%d\n", k, kmax)

	R, M := datapoints.GetSize()
	errs := make(map[string]error)
	runtime.GOMAXPROCS(numworkers)
	models := make([]Model, 0)

	for k <= kmax {
		//		log.Printf("kmeans started k=%d\n", k)
		model := kmeans(datapoints, centroids, measurer)

		// Bisect the returned clusters
		//		log.Println("bisect started")
		bimodel := bisect(model.Clusters, R, M, bisectcc, measurer)
		numCentroids := len(bimodel.Clusters)
		//		log.Printf("bisect returned %d clusters\n", numCentroids)
		models = append(models, model)

		var cent *matrix.DenseMatrix

		if numCentroids <= kmax {
			for rowexists := true; rowexists == true; {
				cent = cc.ChooseCentroids(datapoints, 1)
				rowexists = centroids.RowExists(cent)
			}

			centroids, err = centroids.AppendRow(cent)
			if err != nil {
				errs["ApppendRow"] = err
				break
			}
			k++
		} else {
			k = numCentroids
		}
	}

	//	log.Println("Finished")
	return models, errs
}