Esempio n. 1
0
func mkBowDb(c *command) {
	c.assertLeastNArg(3)

	dbPath := c.flags.Arg(0)
	flib := util.Library(c.flags.Arg(1))
	bowPaths := c.flags.Args()[2:]

	util.AssertOverwritable(dbPath, flagOverwrite)

	db, err := bowdb.Create(flib, dbPath)
	util.Assert(err)

	bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet)
	for b := range bows {
		db.Add(b)
	}
	util.Assert(db.Close())
}
Esempio n. 2
0
func main() {
	timer()
	runtime.GOMAXPROCS(20)

	db, _ := bowdb.Open(fragmentLibraryLoc)
	db.ReadAll()
	//Assert(err, "Could not open BOW database '%s'", path)
	var kCenters []bow.Bowed
	fmt.Println(fmt.Sprintf("%d: Generating cluster centers", timer()))
	if maxRadius > 0 {
		kCenters = maxRadiusKCenter(db.Entries, metric, maxRadius)
		numCenters = len(kCenters)
	} else if centerType == randomSelec {
		kCenters = randomKCenter(db.Entries, metric, numCenters)
	} else if centerType == metricApprox {
		var start_centers []bow.Bowed
		kCenters = metricKCenter(db.Entries, metric, numCenters, start_centers)
	} else if centerType == halfhalf {
		start_centers := randomKCenter(db.Entries, metric, numCenters/2)
		kCenters = metricKCenter(db.Entries, metric, numCenters-numCenters/2, start_centers)
	}
	//    for i, center := range kCenters {
	//        fmt.Println(center.Id + fmt.Sprintf(": %d",i))
	//    }

	runtime.GOMAXPROCS(20)
	fmt.Println(fmt.Sprintf("%d: Computing distances from cluster centers", timer()))
	db_codes := make([]int, len(db.Entries))
	distances := make([]float64, len(db.Entries))
	sem := make(chan empty, len(db.Entries))
	for j, _ := range db.Entries {
		go func(j int) {
			distances[j], _, db_codes[j] = distanceFromSet(metric, db.Entries[j], kCenters)
			//fmt.Println(strconv.Itoa(i) + " " + strconv.Itoa(j) + " " + strconv.FormatFloat(dist,'f',5,32))
		}(j)
		sem <- empty{}
	}
	for i := 0; i < len(db.Entries); i++ {
		<-sem
	}
	runtime.GOMAXPROCS(20)

	fmt.Println(fmt.Sprintf("%d: Writing out centers.cluster.db", timer()))
	db_centers, _ := bowdb.Create(db.Lib, "centers.cluster.db")
	for _, center := range kCenters {
		db_centers.Add(center)
	}
	db_centers.Close()

	fmt.Println(fmt.Sprintf("%d: Opening centers library", timer()))
	db_centers2, _ := bowdb.Open("centers.cluster.db")
	db_centers2.ReadAll()
	var mr map[string]int
	mr = make(map[string]int)
	for i, center := range db_centers2.Entries {
		mr[center.Id] = i
	}

	db_slices := make([][]bow.Bowed, numCenters, numCenters)

	fmt.Println(fmt.Sprintf("%d: Computing individual cluster dbs", timer()))
	for i := 0; i < len(kCenters); i++ {
		//curr_cluster, _ := bowdb.Create(db.Lib, kCenters[i].Id + ".cluster.db")
		//curr_cluster := db_slices[mr[kCenters[i].Id]]
		for j, entry := range db.Entries {
			if i == db_codes[j] {
				db_slices[mr[kCenters[i].Id]] = append(db_slices[mr[kCenters[i].Id]], entry)
			}
		}
		//curr_cluster.Close()
	}

	gobLoc := "clusters.gob"
	fmt.Println(fmt.Sprintf("%d: Serializing gob", timer()))
	enc_gob_ss_db(db_slices, gobLoc)

	fmt.Println(fmt.Sprintf("%d: computing cluster radii", timer()))
	cluster_radii := make([]float64, numCenters)
	cluster_count := make([]int, numCenters)
	for j, _ := range db.Entries {
		if distances[j] > cluster_radii[db_codes[j]] {
			cluster_radii[db_codes[j]] = distances[j]
		}
		cluster_count[db_codes[j]]++
	}

	for j, entry := range kCenters {
		fmt.Println(entry.Id + fmt.Sprintf("\t%f\t%d", cluster_radii[j], cluster_count[j]))
	}
	fmt.Println(fmt.Sprintf("%d: Finished!!", timer()))

}