func mkWeighted(c *command) { c.assertLeastNArg(4) train := util.Library(c.flags.Arg(0)) in := util.Library(c.flags.Arg(1)) outPath := c.flags.Arg(2) bowPaths := c.flags.Args()[3:] util.AssertOverwritable(outPath, flagOverwrite) // The inverse-document-frequencies of each fragment in the "in" fragment // library. numFrags := in.Size() idfs := make([]float32, numFrags) for i := range idfs { idfs[i] = 1 // pseudocount } // Compute the BOWs for each bower against the training fragment lib. bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet) // Now tally the number of bowers that each fragment occurred in. totalBows := float32(1) // for pseudocount correction for bow := range bows { totalBows += 1 for fragi := 0; fragi < numFrags; fragi++ { if bow.Bow.Freqs[fragi] > 0 { idfs[fragi]++ } } } // Compute the IDF using the frequencies against all the BOWs. for i := range idfs { idfs[i] = float32(math.Log(float64(totalBows / idfs[i]))) } // Finally, wrap the given library as a weighted library and save it. wlib, err := fragbag.NewWeightedTfIdf(in, idfs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), wlib) }
func viewLib(c *command) { c.assertNArg(1) lib := util.Library(c.flags.Arg(0)) fmt.Printf("Name: %s\n", lib.Name()) fmt.Printf("Tag: %s\n", strings.Join(libraryTag(lib), "/")) fmt.Printf("Size: %d\n", lib.Size()) fmt.Printf("Fragment Size: %d\n", lib.FragmentSize()) fmt.Printf("IsStructure: %v\n", fragbag.IsStructure(lib)) fmt.Printf("IsSequence: %v\n", fragbag.IsSequence(lib)) }
func mkPaired(c *command) { c.assertNArg(2) in := util.Library(c.flags.Arg(0)) outPath := c.flags.Arg(1) util.AssertOverwritable(outPath, flagOverwrite) if _, ok := in.(fragbag.WeightedLibrary); ok { util.Fatalf("%s is a weighted library (not allowed)", in.Name()) } name := fmt.Sprintf("paired-%s", in.Name()) if fragbag.IsStructure(in) { var pairs [][]structure.Coords lib := in.(fragbag.StructureLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Atoms(i), lib.Atoms(j) pairs = append(pairs, append(f1, f2...)) } } pairLib, err := fragbag.NewStructureAtoms(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "hmm") { var pairs []*seq.HMM lib := in.(fragbag.SequenceLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM) pairs = append(pairs, seq.HMMCat(f1, f2)) } } pairLib, err := fragbag.NewSequenceHMM(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "profile") { util.Fatalf("Sequence profiles not implemented.") } else { util.Fatalf("Unrecognized fragment library: %s", in.Tag()) } }
func mkBowDb(c *command) { c.assertLeastNArg(3) dbPath := c.flags.Arg(0) flib := util.Library(c.flags.Arg(1)) bowPaths := c.flags.Args()[2:] util.AssertOverwritable(dbPath, flagOverwrite) db, err := bowdb.Create(flib, dbPath) util.Assert(err) bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet) for b := range bows { db.Add(b) } util.Assert(db.Close()) }
func vectors(c *command) { c.assertLeastNArg(2) flib := util.Library(c.flags.Arg(0)) bowPaths := c.flags.Args()[1:] tostrs := func(freqs []float32) []string { strs := make([]string, len(freqs)) for i := range freqs { strs[i] = strconv.FormatFloat(float64(freqs[i]), 'f', -1, 32) } return strs } results := util.ProcessBowers(bowPaths, flib, flagPairdistModels, flagCpu, true) for r := range results { fmt.Printf("%s\t%s\n", r.Id, strings.Join(tostrs(r.Bow.Freqs), "\t")) } }
func pairdist(c *command) { c.assertLeastNArg(2) flib := util.Library(c.flags.Arg(0)) bowPaths := c.flags.Args()[1:] bows := make([]bow.Bowed, 0, 1000) results := util.ProcessBowers(bowPaths, flib, flagPairdistModels, flagCpu, util.FlagQuiet) for r := range results { bows = append(bows, r) } for i := 0; i < len(bows); i++ { b1 := bows[i] for j := i + 1; j < len(bows); j++ { b2 := bows[j] dist := math.Abs(b1.Bow.Cosine(b2.Bow)) fmt.Printf("%s\t%s\t%0.4f\n", b1.Id, b2.Id, dist) } } }
func main() { //start := time.Now() rand.Seed(1) //fmt.Println("Loading query") flagCpu := runtime.NumCPU() fragmentLib := util.Library(json) pdbQueries := make([]string, 1) pdbQueries[0] = pdbQuery bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet) // for b := range bows { // searchQuery.Add(b) // } db_centers, _ := bowdb.Open(fragmentLibraryLoc) db_centers.ReadAll() //fmt.Println(fmt.Sprintf("\t%d",timer())) //fmt.Println("Unserializing gob") db_slices := dec_gob_ss_db(gobLoc) var m map[string]int m = make(map[string]int) for i, center := range db_centers.Entries { m[center.Id] = i } //fmt.Println(fmt.Sprintf("\t%d",timer())) sortBy := bowdb.SortByEuclid if metric == cosineDist { sortBy = bowdb.SortByCosine } var coarse_search = bowdb.SearchOptions{ Limit: -1, Min: 0.0, Max: (float64(clusterRadius) + float64(maxRadius)), SortBy: sortBy, Order: bowdb.OrderAsc, } //var fine_search = bowdb.SearchOptions{ //Limit: -1, //Min: 0.0, //Max: float64(maxRadius), //SortBy: bowdb.SortByEuclid, // Order: bowdb.OrderAsc, //} //fmt.Println("Computing coarse results") for b := range bows { var coarse_results []bowdb.SearchResult coarse_results = db_centers.Search(coarse_search, b) //coarse_results_time := timer() //fmt.Println(fmt.Sprintf("\t%d",coarse_results_time)) //fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results))) // fmt.Println("Computing fine results") var fine_results []bowdb.SearchResult for _, center := range coarse_results { for _, entry := range db_slices[m[center.Id]] { var dist float64 switch metric { case cosineDist: dist = b.Bow.Cosine(entry.Bow) case euclideanDist: dist = b.Bow.Euclid(entry.Bow) } if dist <= float64(maxRadius) { result := newSearchResult(b, entry) fmt.Printf(entry.Id) fmt.Printf(" ") fmt.Printf("%v", dist) fmt.Printf(" ") fine_results = append(fine_results, result) } } } } }