func main() { //start := time.Now() rand.Seed(1) //fmt.Println("Loading query") flagCpu := runtime.NumCPU() fragmentLib := util.Library(json) pdbQueries := make([]string, 1) pdbQueries[0] = pdbQuery bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet) // for b := range bows { // searchQuery.Add(b) // } db_centers, _ := bowdb.Open(fragmentLibraryLoc) db_centers.ReadAll() //fmt.Println(fmt.Sprintf("\t%d",timer())) //fmt.Println("Unserializing gob") db_slices := dec_gob_ss_db(gobLoc) var m map[string]int m = make(map[string]int) for i, center := range db_centers.Entries { m[center.Id] = i } //fmt.Println(fmt.Sprintf("\t%d",timer())) sortBy := bowdb.SortByEuclid if metric == cosineDist { sortBy = bowdb.SortByCosine } var coarse_search = bowdb.SearchOptions{ Limit: -1, Min: 0.0, Max: (float64(clusterRadius) + float64(maxRadius)), SortBy: sortBy, Order: bowdb.OrderAsc, } //var fine_search = bowdb.SearchOptions{ //Limit: -1, //Min: 0.0, //Max: float64(maxRadius), //SortBy: bowdb.SortByEuclid, // Order: bowdb.OrderAsc, //} //fmt.Println("Computing coarse results") for b := range bows { var coarse_results []bowdb.SearchResult coarse_results = db_centers.Search(coarse_search, b) //coarse_results_time := timer() //fmt.Println(fmt.Sprintf("\t%d",coarse_results_time)) //fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results))) // fmt.Println("Computing fine results") var fine_results []bowdb.SearchResult for _, center := range coarse_results { for _, entry := range db_slices[m[center.Id]] { var dist float64 switch metric { case cosineDist: dist = b.Bow.Cosine(entry.Bow) case euclideanDist: dist = b.Bow.Euclid(entry.Bow) } if dist <= float64(maxRadius) { result := newSearchResult(b, entry) fmt.Printf(entry.Id) fmt.Printf(" ") fmt.Printf("%v", dist) fmt.Printf(" ") fine_results = append(fine_results, result) } } } } }
func main() { timer() runtime.GOMAXPROCS(20) db, _ := bowdb.Open(fragmentLibraryLoc) db.ReadAll() //Assert(err, "Could not open BOW database '%s'", path) var kCenters []bow.Bowed fmt.Println(fmt.Sprintf("%d: Generating cluster centers", timer())) if maxRadius > 0 { kCenters = maxRadiusKCenter(db.Entries, metric, maxRadius) numCenters = len(kCenters) } else if centerType == randomSelec { kCenters = randomKCenter(db.Entries, metric, numCenters) } else if centerType == metricApprox { var start_centers []bow.Bowed kCenters = metricKCenter(db.Entries, metric, numCenters, start_centers) } else if centerType == halfhalf { start_centers := randomKCenter(db.Entries, metric, numCenters/2) kCenters = metricKCenter(db.Entries, metric, numCenters-numCenters/2, start_centers) } // for i, center := range kCenters { // fmt.Println(center.Id + fmt.Sprintf(": %d",i)) // } runtime.GOMAXPROCS(20) fmt.Println(fmt.Sprintf("%d: Computing distances from cluster centers", timer())) db_codes := make([]int, len(db.Entries)) distances := make([]float64, len(db.Entries)) sem := make(chan empty, len(db.Entries)) for j, _ := range db.Entries { go func(j int) { distances[j], _, db_codes[j] = distanceFromSet(metric, db.Entries[j], kCenters) //fmt.Println(strconv.Itoa(i) + " " + strconv.Itoa(j) + " " + strconv.FormatFloat(dist,'f',5,32)) }(j) sem <- empty{} } for i := 0; i < len(db.Entries); i++ { <-sem } runtime.GOMAXPROCS(20) fmt.Println(fmt.Sprintf("%d: Writing out centers.cluster.db", timer())) db_centers, _ := bowdb.Create(db.Lib, "centers.cluster.db") for _, center := range kCenters { db_centers.Add(center) } db_centers.Close() fmt.Println(fmt.Sprintf("%d: Opening centers library", timer())) db_centers2, _ := bowdb.Open("centers.cluster.db") db_centers2.ReadAll() var mr map[string]int mr = make(map[string]int) for i, center := range db_centers2.Entries { mr[center.Id] = i } db_slices := make([][]bow.Bowed, numCenters, numCenters) fmt.Println(fmt.Sprintf("%d: Computing individual cluster dbs", timer())) for i := 0; i < len(kCenters); i++ { //curr_cluster, _ := bowdb.Create(db.Lib, kCenters[i].Id + ".cluster.db") //curr_cluster := db_slices[mr[kCenters[i].Id]] for j, entry := range db.Entries { if i == db_codes[j] { db_slices[mr[kCenters[i].Id]] = append(db_slices[mr[kCenters[i].Id]], entry) } } //curr_cluster.Close() } gobLoc := "clusters.gob" fmt.Println(fmt.Sprintf("%d: Serializing gob", timer())) enc_gob_ss_db(db_slices, gobLoc) fmt.Println(fmt.Sprintf("%d: computing cluster radii", timer())) cluster_radii := make([]float64, numCenters) cluster_count := make([]int, numCenters) for j, _ := range db.Entries { if distances[j] > cluster_radii[db_codes[j]] { cluster_radii[db_codes[j]] = distances[j] } cluster_count[db_codes[j]]++ } for j, entry := range kCenters { fmt.Println(entry.Id + fmt.Sprintf("\t%f\t%d", cluster_radii[j], cluster_count[j])) } fmt.Println(fmt.Sprintf("%d: Finished!!", timer())) }
func OpenBowDB(path string) *bowdb.DB { db, err := bowdb.Open(path) Assert(err, "Could not open BOW database '%s'", path) return db }
func main() { rand.Seed(1) db_query, _ := bowdb.Open(searchQuery) db_query.ReadAll() var query bow.Bowed query = db_query.Entries[0] db_centers, _ := bowdb.Open(fragmentLibraryLoc) db_centers.ReadAll() db_slices := dec_gob_ss_db("clusters.gob") var m map[string]int m = make(map[string]int) for i, center := range db_centers.Entries { m[center.Id] = i } /* sortBy := bowdb.SortByEuclid if metric == cosineDist { sortBy = bowdb.SortByCosine } */ db, _ := bowdb.Open(potentialTargetsLoc) db.ReadAll() //repeatNum := 10 // How many times to repeat each run for timing purposes fmt.Println("Radius\tAccelCount\tLongCount\tAccel\tNaive\tSpeedup\tSensitivity\tFineCandidates") for maxR := 0; maxR < 50; maxR = maxR + 1 { maxRadius := 0.0 if metric == cosineDist { maxRadius = float64(maxR) / 100.0 } else { maxRadius = float64(maxR) } coarse_radius := float64(clusterRadius) + float64(maxRadius) accelCount := make([]int, repeatNum) longCount := make([]int, repeatNum) accelTime := make([]int64, repeatNum) naiveTime := make([]int64, repeatNum) fineCandidates := make([]int, repeatNum) for rep := 0; rep < repeatNum; rep++ { timer() var coarse_results []bowdb.SearchResult //coarse_results = db_centers.Search(coarse_search, query) for _, entry := range db_centers.Entries { var dist float64 switch metric { case cosineDist: dist = query.Bow.Cosine(entry.Bow) case euclideanDist: dist = query.Bow.Euclid(entry.Bow) } if dist <= coarse_radius { result := newSearchResult(query, entry) coarse_results = append(coarse_results, result) } } coarse_results_time := timer() var fine_results []bowdb.SearchResult fine_candidates := 0 for _, center := range coarse_results { fine_candidates += len(db_slices[m[center.Id]]) for _, entry := range db_slices[m[center.Id]] { var dist float64 switch metric { case cosineDist: dist = query.Bow.Cosine(entry.Bow) case euclideanDist: dist = query.Bow.Euclid(entry.Bow) } if dist <= float64(maxRadius) { result := newSearchResult(query, entry) fine_results = append(fine_results, result) } } } fine_results_time := timer() var long_results []bowdb.SearchResult //long_results = db.Search(fine_search, query) for _, entry := range db.Entries { var dist float64 switch metric { case cosineDist: dist = query.Bow.Cosine(entry.Bow) case euclideanDist: dist = query.Bow.Euclid(entry.Bow) } if dist <= float64(maxRadius) { result := newSearchResult(query, entry) long_results = append(long_results, result) } } long_results_time := timer() /*if (len(long_results)!=len(fine_results)) { err := "Fine and long searches did not match." fmt.Fprintf(os.Stderr, "error: %v\n", err) fmt.Fprintf(os.Stderr, "Fine: %v\n", len(fine_results)) fmt.Fprintf(os.Stderr, "Long: %v\n", len(long_results)) os.Exit(1) } */ long_count := len(long_results) fine_count := len(fine_results) accel_time := coarse_results_time + fine_results_time accelCount[rep] = fine_count longCount[rep] = long_count accelTime[rep] = accel_time naiveTime[rep] = long_results_time fineCandidates[rep] = fine_candidates } accelCountAvg := averageInt2F64(accelCount) naiveCountAvg := averageInt2F64(longCount) accelTimeAvg := averageInt642F64(accelTime) naiveTimeAvg := averageInt642F64(naiveTime) sensitivity := accelCountAvg / naiveCountAvg speedup := naiveTimeAvg / accelTimeAvg fineSearchCount := averageInt2F64(fineCandidates) fmt.Println(fmt.Sprintf("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f", maxRadius, accelCountAvg, naiveCountAvg, accelTimeAvg, naiveTimeAvg, speedup, sensitivity, fineSearchCount)) } }