func search(c *command) { c.assertLeastNArg(2) // Some search options don't translate directly to command line parameters // specified by the flag package. if flagSearchDesc { flagSearchOpts.Order = bowdb.OrderDesc } switch flagSearchSort { case "cosine": flagSearchOpts.SortBy = bowdb.SortByCosine case "euclid": flagSearchOpts.SortBy = bowdb.SortByEuclid default: util.Fatalf("Unknown sort field '%s'.", flagSearchSort) } db := util.OpenBowDB(c.flags.Arg(0)) bowPaths := c.flags.Args()[1:] _, err := db.ReadAll() util.Assert(err, "Could not read BOW database entries") // always hide the progress bar here. bows := util.ProcessBowers(bowPaths, db.Lib, false, flagCpu, true) out, outDone := outputter() // launch goroutines to search queries in parallel wgSearch := new(sync.WaitGroup) for i := 0; i < flagCpu; i++ { wgSearch.Add(1) go func() { defer wgSearch.Done() for b := range bows { sr := db.Search(flagSearchOpts, b) out <- searchResult{b, sr} } }() } wgSearch.Wait() close(out) <-outDone util.Assert(db.Close()) }
func mkBowDb(c *command) { c.assertLeastNArg(3) dbPath := c.flags.Arg(0) flib := util.Library(c.flags.Arg(1)) bowPaths := c.flags.Args()[2:] util.AssertOverwritable(dbPath, flagOverwrite) db, err := bowdb.Create(flib, dbPath) util.Assert(err) bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet) for b := range bows { db.Add(b) } util.Assert(db.Close()) }
func vectors(c *command) { c.assertLeastNArg(2) flib := util.Library(c.flags.Arg(0)) bowPaths := c.flags.Args()[1:] tostrs := func(freqs []float32) []string { strs := make([]string, len(freqs)) for i := range freqs { strs[i] = strconv.FormatFloat(float64(freqs[i]), 'f', -1, 32) } return strs } results := util.ProcessBowers(bowPaths, flib, flagPairdistModels, flagCpu, true) for r := range results { fmt.Printf("%s\t%s\n", r.Id, strings.Join(tostrs(r.Bow.Freqs), "\t")) } }
func mkWeighted(c *command) { c.assertLeastNArg(4) train := util.Library(c.flags.Arg(0)) in := util.Library(c.flags.Arg(1)) outPath := c.flags.Arg(2) bowPaths := c.flags.Args()[3:] util.AssertOverwritable(outPath, flagOverwrite) // The inverse-document-frequencies of each fragment in the "in" fragment // library. numFrags := in.Size() idfs := make([]float32, numFrags) for i := range idfs { idfs[i] = 1 // pseudocount } // Compute the BOWs for each bower against the training fragment lib. bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet) // Now tally the number of bowers that each fragment occurred in. totalBows := float32(1) // for pseudocount correction for bow := range bows { totalBows += 1 for fragi := 0; fragi < numFrags; fragi++ { if bow.Bow.Freqs[fragi] > 0 { idfs[fragi]++ } } } // Compute the IDF using the frequencies against all the BOWs. for i := range idfs { idfs[i] = float32(math.Log(float64(totalBows / idfs[i]))) } // Finally, wrap the given library as a weighted library and save it. wlib, err := fragbag.NewWeightedTfIdf(in, idfs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), wlib) }
func pairdist(c *command) { c.assertLeastNArg(2) flib := util.Library(c.flags.Arg(0)) bowPaths := c.flags.Args()[1:] bows := make([]bow.Bowed, 0, 1000) results := util.ProcessBowers(bowPaths, flib, flagPairdistModels, flagCpu, util.FlagQuiet) for r := range results { bows = append(bows, r) } for i := 0; i < len(bows); i++ { b1 := bows[i] for j := i + 1; j < len(bows); j++ { b2 := bows[j] dist := math.Abs(b1.Bow.Cosine(b2.Bow)) fmt.Printf("%s\t%s\t%0.4f\n", b1.Id, b2.Id, dist) } } }
func main() { //start := time.Now() rand.Seed(1) //fmt.Println("Loading query") flagCpu := runtime.NumCPU() fragmentLib := util.Library(json) pdbQueries := make([]string, 1) pdbQueries[0] = pdbQuery bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet) // for b := range bows { // searchQuery.Add(b) // } db_centers, _ := bowdb.Open(fragmentLibraryLoc) db_centers.ReadAll() //fmt.Println(fmt.Sprintf("\t%d",timer())) //fmt.Println("Unserializing gob") db_slices := dec_gob_ss_db(gobLoc) var m map[string]int m = make(map[string]int) for i, center := range db_centers.Entries { m[center.Id] = i } //fmt.Println(fmt.Sprintf("\t%d",timer())) sortBy := bowdb.SortByEuclid if metric == cosineDist { sortBy = bowdb.SortByCosine } var coarse_search = bowdb.SearchOptions{ Limit: -1, Min: 0.0, Max: (float64(clusterRadius) + float64(maxRadius)), SortBy: sortBy, Order: bowdb.OrderAsc, } //var fine_search = bowdb.SearchOptions{ //Limit: -1, //Min: 0.0, //Max: float64(maxRadius), //SortBy: bowdb.SortByEuclid, // Order: bowdb.OrderAsc, //} //fmt.Println("Computing coarse results") for b := range bows { var coarse_results []bowdb.SearchResult coarse_results = db_centers.Search(coarse_search, b) //coarse_results_time := timer() //fmt.Println(fmt.Sprintf("\t%d",coarse_results_time)) //fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results))) // fmt.Println("Computing fine results") var fine_results []bowdb.SearchResult for _, center := range coarse_results { for _, entry := range db_slices[m[center.Id]] { var dist float64 switch metric { case cosineDist: dist = b.Bow.Cosine(entry.Bow) case euclideanDist: dist = b.Bow.Euclid(entry.Bow) } if dist <= float64(maxRadius) { result := newSearchResult(b, entry) fmt.Printf(entry.Id) fmt.Printf(" ") fmt.Printf("%v", dist) fmt.Printf(" ") fine_results = append(fine_results, result) } } } } }