예제 #1
0
파일: main.go 프로젝트: ndaniels/esfragbag
func main() {
	//start := time.Now()
	rand.Seed(1)
	//fmt.Println("Loading query")
	flagCpu := runtime.NumCPU()
	fragmentLib := util.Library(json)
	pdbQueries := make([]string, 1)
	pdbQueries[0] = pdbQuery
	bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet)
	// for b := range bows {
	//   searchQuery.Add(b)
	// }

	db_centers, _ := bowdb.Open(fragmentLibraryLoc)
	db_centers.ReadAll()
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	//fmt.Println("Unserializing gob")
	db_slices := dec_gob_ss_db(gobLoc)
	var m map[string]int
	m = make(map[string]int)
	for i, center := range db_centers.Entries {
		m[center.Id] = i
	}
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	sortBy := bowdb.SortByEuclid
	if metric == cosineDist {
		sortBy = bowdb.SortByCosine
	}

	var coarse_search = bowdb.SearchOptions{
		Limit:  -1,
		Min:    0.0,
		Max:    (float64(clusterRadius) + float64(maxRadius)),
		SortBy: sortBy,
		Order:  bowdb.OrderAsc,
	}

	//var fine_search = bowdb.SearchOptions{
	//Limit:  -1,
	//Min:    0.0,
	//Max:    float64(maxRadius),
	//SortBy: bowdb.SortByEuclid,
	// Order:  bowdb.OrderAsc,
	//}

	//fmt.Println("Computing coarse results")
	for b := range bows {
		var coarse_results []bowdb.SearchResult
		coarse_results = db_centers.Search(coarse_search, b)
		//coarse_results_time := timer()
		//fmt.Println(fmt.Sprintf("\t%d",coarse_results_time))
		//fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results)))

		// fmt.Println("Computing fine results")
		var fine_results []bowdb.SearchResult
		for _, center := range coarse_results {
			for _, entry := range db_slices[m[center.Id]] {
				var dist float64
				switch metric {
				case cosineDist:
					dist = b.Bow.Cosine(entry.Bow)
				case euclideanDist:
					dist = b.Bow.Euclid(entry.Bow)
				}
				if dist <= float64(maxRadius) {
					result := newSearchResult(b, entry)
					fmt.Printf(entry.Id)
					fmt.Printf(" ")
					fmt.Printf("%v", dist)
					fmt.Printf(" ")
					fine_results = append(fine_results, result)
				}
			}
		}
	}

}
예제 #2
0
파일: main.go 프로젝트: ndaniels/esfragbag
func main() {
	timer()
	runtime.GOMAXPROCS(20)

	db, _ := bowdb.Open(fragmentLibraryLoc)
	db.ReadAll()
	//Assert(err, "Could not open BOW database '%s'", path)
	var kCenters []bow.Bowed
	fmt.Println(fmt.Sprintf("%d: Generating cluster centers", timer()))
	if maxRadius > 0 {
		kCenters = maxRadiusKCenter(db.Entries, metric, maxRadius)
		numCenters = len(kCenters)
	} else if centerType == randomSelec {
		kCenters = randomKCenter(db.Entries, metric, numCenters)
	} else if centerType == metricApprox {
		var start_centers []bow.Bowed
		kCenters = metricKCenter(db.Entries, metric, numCenters, start_centers)
	} else if centerType == halfhalf {
		start_centers := randomKCenter(db.Entries, metric, numCenters/2)
		kCenters = metricKCenter(db.Entries, metric, numCenters-numCenters/2, start_centers)
	}
	//    for i, center := range kCenters {
	//        fmt.Println(center.Id + fmt.Sprintf(": %d",i))
	//    }

	runtime.GOMAXPROCS(20)
	fmt.Println(fmt.Sprintf("%d: Computing distances from cluster centers", timer()))
	db_codes := make([]int, len(db.Entries))
	distances := make([]float64, len(db.Entries))
	sem := make(chan empty, len(db.Entries))
	for j, _ := range db.Entries {
		go func(j int) {
			distances[j], _, db_codes[j] = distanceFromSet(metric, db.Entries[j], kCenters)
			//fmt.Println(strconv.Itoa(i) + " " + strconv.Itoa(j) + " " + strconv.FormatFloat(dist,'f',5,32))
		}(j)
		sem <- empty{}
	}
	for i := 0; i < len(db.Entries); i++ {
		<-sem
	}
	runtime.GOMAXPROCS(20)

	fmt.Println(fmt.Sprintf("%d: Writing out centers.cluster.db", timer()))
	db_centers, _ := bowdb.Create(db.Lib, "centers.cluster.db")
	for _, center := range kCenters {
		db_centers.Add(center)
	}
	db_centers.Close()

	fmt.Println(fmt.Sprintf("%d: Opening centers library", timer()))
	db_centers2, _ := bowdb.Open("centers.cluster.db")
	db_centers2.ReadAll()
	var mr map[string]int
	mr = make(map[string]int)
	for i, center := range db_centers2.Entries {
		mr[center.Id] = i
	}

	db_slices := make([][]bow.Bowed, numCenters, numCenters)

	fmt.Println(fmt.Sprintf("%d: Computing individual cluster dbs", timer()))
	for i := 0; i < len(kCenters); i++ {
		//curr_cluster, _ := bowdb.Create(db.Lib, kCenters[i].Id + ".cluster.db")
		//curr_cluster := db_slices[mr[kCenters[i].Id]]
		for j, entry := range db.Entries {
			if i == db_codes[j] {
				db_slices[mr[kCenters[i].Id]] = append(db_slices[mr[kCenters[i].Id]], entry)
			}
		}
		//curr_cluster.Close()
	}

	gobLoc := "clusters.gob"
	fmt.Println(fmt.Sprintf("%d: Serializing gob", timer()))
	enc_gob_ss_db(db_slices, gobLoc)

	fmt.Println(fmt.Sprintf("%d: computing cluster radii", timer()))
	cluster_radii := make([]float64, numCenters)
	cluster_count := make([]int, numCenters)
	for j, _ := range db.Entries {
		if distances[j] > cluster_radii[db_codes[j]] {
			cluster_radii[db_codes[j]] = distances[j]
		}
		cluster_count[db_codes[j]]++
	}

	for j, entry := range kCenters {
		fmt.Println(entry.Id + fmt.Sprintf("\t%f\t%d", cluster_radii[j], cluster_count[j]))
	}
	fmt.Println(fmt.Sprintf("%d: Finished!!", timer()))

}
예제 #3
0
파일: resources.go 프로젝트: ndaniels/tools
func OpenBowDB(path string) *bowdb.DB {
	db, err := bowdb.Open(path)
	Assert(err, "Could not open BOW database '%s'", path)
	return db
}
예제 #4
0
파일: main.go 프로젝트: ndaniels/esfragbag
func main() {
	rand.Seed(1)

	db_query, _ := bowdb.Open(searchQuery)
	db_query.ReadAll()
	var query bow.Bowed
	query = db_query.Entries[0]

	db_centers, _ := bowdb.Open(fragmentLibraryLoc)
	db_centers.ReadAll()

	db_slices := dec_gob_ss_db("clusters.gob")
	var m map[string]int
	m = make(map[string]int)
	for i, center := range db_centers.Entries {
		m[center.Id] = i
	}
	/*
	   sortBy := bowdb.SortByEuclid
	   if metric == cosineDist {
	       sortBy = bowdb.SortByCosine
	   }
	*/

	db, _ := bowdb.Open(potentialTargetsLoc)
	db.ReadAll()

	//repeatNum := 10 // How many times to repeat each run for timing purposes
	fmt.Println("Radius\tAccelCount\tLongCount\tAccel\tNaive\tSpeedup\tSensitivity\tFineCandidates")
	for maxR := 0; maxR < 50; maxR = maxR + 1 {
		maxRadius := 0.0
		if metric == cosineDist {
			maxRadius = float64(maxR) / 100.0
		} else {
			maxRadius = float64(maxR)
		}
		coarse_radius := float64(clusterRadius) + float64(maxRadius)

		accelCount := make([]int, repeatNum)
		longCount := make([]int, repeatNum)
		accelTime := make([]int64, repeatNum)
		naiveTime := make([]int64, repeatNum)

		fineCandidates := make([]int, repeatNum)

		for rep := 0; rep < repeatNum; rep++ {
			timer()
			var coarse_results []bowdb.SearchResult
			//coarse_results = db_centers.Search(coarse_search, query)
			for _, entry := range db_centers.Entries {
				var dist float64
				switch metric {
				case cosineDist:
					dist = query.Bow.Cosine(entry.Bow)
				case euclideanDist:
					dist = query.Bow.Euclid(entry.Bow)
				}
				if dist <= coarse_radius {
					result := newSearchResult(query, entry)
					coarse_results = append(coarse_results, result)
				}
			}
			coarse_results_time := timer()

			var fine_results []bowdb.SearchResult
			fine_candidates := 0
			for _, center := range coarse_results {
				fine_candidates += len(db_slices[m[center.Id]])
				for _, entry := range db_slices[m[center.Id]] {
					var dist float64
					switch metric {
					case cosineDist:
						dist = query.Bow.Cosine(entry.Bow)
					case euclideanDist:
						dist = query.Bow.Euclid(entry.Bow)
					}
					if dist <= float64(maxRadius) {
						result := newSearchResult(query, entry)
						fine_results = append(fine_results, result)
					}
				}
			}
			fine_results_time := timer()

			var long_results []bowdb.SearchResult
			//long_results = db.Search(fine_search, query)
			for _, entry := range db.Entries {
				var dist float64
				switch metric {
				case cosineDist:
					dist = query.Bow.Cosine(entry.Bow)
				case euclideanDist:
					dist = query.Bow.Euclid(entry.Bow)
				}
				if dist <= float64(maxRadius) {
					result := newSearchResult(query, entry)
					long_results = append(long_results, result)
				}
			}

			long_results_time := timer()
			/*if (len(long_results)!=len(fine_results)) {
			    err := "Fine and long searches did not match."
			    fmt.Fprintf(os.Stderr, "error: %v\n", err)
			    fmt.Fprintf(os.Stderr, "Fine: %v\n", len(fine_results))
			    fmt.Fprintf(os.Stderr, "Long: %v\n", len(long_results))
			    os.Exit(1)
			} */
			long_count := len(long_results)
			fine_count := len(fine_results)
			accel_time := coarse_results_time + fine_results_time

			accelCount[rep] = fine_count
			longCount[rep] = long_count
			accelTime[rep] = accel_time
			naiveTime[rep] = long_results_time
			fineCandidates[rep] = fine_candidates
		}
		accelCountAvg := averageInt2F64(accelCount)
		naiveCountAvg := averageInt2F64(longCount)
		accelTimeAvg := averageInt642F64(accelTime)
		naiveTimeAvg := averageInt642F64(naiveTime)
		sensitivity := accelCountAvg / naiveCountAvg
		speedup := naiveTimeAvg / accelTimeAvg
		fineSearchCount := averageInt2F64(fineCandidates)
		fmt.Println(fmt.Sprintf("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f", maxRadius, accelCountAvg, naiveCountAvg, accelTimeAvg, naiveTimeAvg, speedup, sensitivity, fineSearchCount))
	}

}