func main() { if len(util.FlagCpuProf) > 0 { f := util.CreateFile(util.FlagCpuProf) pprof.StartCPUProfile(f) defer f.Close() defer pprof.StopCPUProfile() } if len(flagGobIt) > 0 { astralDir := util.Arg(0) dists := readAlignmentDists(astralDir) enc := gob.NewEncoder(util.CreateFile(flagGobIt)) util.Assert(enc.Encode(dists), "Could not GOB encode distances") return } var dists *intern.Table if util.IsDir(util.Arg(0)) { dists = readAlignmentDists(util.Arg(0)) } else { dec := gob.NewDecoder(util.OpenFile(util.Arg(0))) util.Assert(dec.Decode(&dists), "Could not GOB decode distances") } treeFile := util.Arg(1) outPath := util.Arg(2) treeReader := newick.NewReader(util.OpenFile(treeFile)) tree, err := treeReader.ReadTree() util.Assert(err, "Could not read newick tree") csvw := csv.NewWriter(util.CreateFile(outPath)) clusters := treeClusters(flagThreshold, dists, tree) util.Assert(csvw.WriteAll(clusters)) }
func main() { var f io.Reader var err error f = util.OpenFile(flag.Arg(0)) if strings.HasSuffix(flag.Arg(0), ".gz") { f, err = gzip.NewReader(f) util.Assert(err) } cifEntry, err := pdbx.Read(f) util.Assert(err, "Could not read PDBx/mmCIF file") fasEntries := make([]seq.Sequence, 0, 5) for _, ent := range cifEntry.Entities { for _, chain := range ent.Chains { if !isChainUsable(chain) || len(ent.Seq) == 0 { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: ent.Seq, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
func mkPaired(c *command) { c.assertNArg(2) in := util.Library(c.flags.Arg(0)) outPath := c.flags.Arg(1) util.AssertOverwritable(outPath, flagOverwrite) if _, ok := in.(fragbag.WeightedLibrary); ok { util.Fatalf("%s is a weighted library (not allowed)", in.Name()) } name := fmt.Sprintf("paired-%s", in.Name()) if fragbag.IsStructure(in) { var pairs [][]structure.Coords lib := in.(fragbag.StructureLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Atoms(i), lib.Atoms(j) pairs = append(pairs, append(f1, f2...)) } } pairLib, err := fragbag.NewStructureAtoms(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "hmm") { var pairs []*seq.HMM lib := in.(fragbag.SequenceLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM) pairs = append(pairs, seq.HMMCat(f1, f2)) } } pairLib, err := fragbag.NewSequenceHMM(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "profile") { util.Fatalf("Sequence profiles not implemented.") } else { util.Fatalf("Unrecognized fragment library: %s", in.Tag()) } }
func main() { outDir := util.Arg(0) fasInps := util.Args()[1:] util.Assert(os.MkdirAll(outDir, 0777)) fastaChan := make(chan string) wg := new(sync.WaitGroup) for i := 0; i < max(1, runtime.GOMAXPROCS(0)); i++ { go func() { wg.Add(1) for fasta := range fastaChan { util.Verbosef("Computing map for '%s'...", fasta) fmap := util.GetFmap(fasta) outF := path.Join(outDir, fmt.Sprintf("%s.fmap", fmap.Name)) util.FmapWrite(util.CreateFile(outF), fmap) } wg.Done() }() } for _, fasta := range fasInps { fastaChan <- fasta } close(fastaChan) wg.Wait() }
func main() { fasInp := util.Arg(0) fmapOut := util.Arg(1) fmap := util.GetFmap(fasInp) util.FmapWrite(util.CreateFile(fmapOut), fmap) }
func main() { var cmd string var help bool if len(os.Args) < 2 { usage() } else if strings.TrimLeft(os.Args[1], "-") == "help" { if len(os.Args) < 3 { usage() } else { cmd = os.Args[2] help = true } } else { cmd = os.Args[1] } for _, c := range commands { if c.name == cmd { c.setCommonFlags() if c.addFlags != nil { c.addFlags(c) } if help { c.showHelp() } else { c.flags.Usage = c.showUsage c.flags.Parse(os.Args[2:]) if flagCpu < 1 { flagCpu = 1 } runtime.GOMAXPROCS(flagCpu) if len(flagCpuProfile) > 0 { f := util.CreateFile(flagCpuProfile) pprof.StartCPUProfile(f) defer f.Close() defer pprof.StopCPUProfile() } c.run(c) return } } } log.Printf("Unknown command '%s'. Run 'flib help' for a list of "+ "available commands.", cmd) os.Exit(1) }
func main() { inFasta := util.Arg(0) outHHM := util.Arg(1) hhblits := hhsuite.HHBlitsDefault hhmake := hhsuite.HHMakePseudo hhblits.Verbose = !flagQuiet hhmake.Verbose = !flagQuiet HHM, err := hhsuite.BuildHHM( hhblits, hhmake, util.FlagSeqDB, inFasta) util.Assert(err, "Error building HHM") util.Assert(hmm.WriteHHM(util.CreateFile(outHHM), HHM), "Error writing HHM '%s'", outHHM) }
func mkWeighted(c *command) { c.assertLeastNArg(4) train := util.Library(c.flags.Arg(0)) in := util.Library(c.flags.Arg(1)) outPath := c.flags.Arg(2) bowPaths := c.flags.Args()[3:] util.AssertOverwritable(outPath, flagOverwrite) // The inverse-document-frequencies of each fragment in the "in" fragment // library. numFrags := in.Size() idfs := make([]float32, numFrags) for i := range idfs { idfs[i] = 1 // pseudocount } // Compute the BOWs for each bower against the training fragment lib. bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet) // Now tally the number of bowers that each fragment occurred in. totalBows := float32(1) // for pseudocount correction for bow := range bows { totalBows += 1 for fragi := 0; fragi < numFrags; fragi++ { if bow.Bow.Freqs[fragi] > 0 { idfs[fragi]++ } } } // Compute the IDF using the frequencies against all the BOWs. for i := range idfs { idfs[i] = float32(math.Log(float64(totalBows / idfs[i]))) } // Finally, wrap the given library as a weighted library and save it. wlib, err := fragbag.NewWeightedTfIdf(in, idfs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), wlib) }
func main() { a3mPath := util.Arg(0) fa3m := util.OpenFile(a3mPath) freader := fasta.NewReader(fa3m) freader.TrustSequences = true seqs, err := freader.ReadAll() util.Assert(err, "Could not read fasta format '%s'", a3mPath) util.Assert(fa3m.Close()) w := util.CreateFile(a3mPath) fwriter := fasta.NewWriter(w) fwriter.Columns = 0 for _, seq := range seqs { if len(seq.Residues) > 0 { util.Assert(fwriter.Write(seq)) } } util.Assert(fwriter.Flush()) util.Assert(w.Close()) }
func main() { libPath := util.Arg(0) chain := util.Arg(1) pdbEntryPath := util.Arg(2) bowOut := util.Arg(3) lib := util.StructureLibrary(libPath) entry := util.PDBRead(pdbEntryPath) thechain := entry.Chain(chain[0]) if thechain == nil || !thechain.IsProtein() { util.Fatalf("Could not find chain with identifier '%c'.", chain[0]) } bow := bow.BowerFromChain(thechain).StructureBow(lib) if bowOut == "--" { fmt.Println(bow) } else { util.BowWrite(util.CreateFile(bowOut), bow) } }
func main() { rfasta := util.OpenFasta(util.Arg(0)) dir := util.Arg(1) util.Assert(os.MkdirAll(dir, 0777)) fr := fasta.NewReader(rfasta) for { s, err := fr.Read() if err != nil { if err == io.EOF { break } util.Assert(err) } s.Name = strings.Fields(s.Name)[0] fw := util.CreateFile(path.Join(dir, s.Name+".fasta")) w := fasta.NewWriter(fw) util.Assert(w.Write(s)) util.Assert(w.Flush()) util.Assert(fw.Close()) } }
func mkSeqProfile(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Initialize a frequency and null profile for each structural fragment. var freqProfiles []*seq.FrequencyProfile var fpChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { fp := seq.NewFrequencyProfile(structLib.FragmentSize()) freqProfiles = append(freqProfiles, fp) fpChans = append(fpChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. nullChan, nullProfile := addToNull() for i := 0; i < structLib.Size(); i++ { addToProfile(fpChans[i], freqProfiles[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nullChan, fpChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. close(nullChan) for i := 0; i < structLib.Size(); i++ { close(fpChans[i]) } wgSeqFragments.Wait() // Finally, add the sequence fragments to a new sequence fragment // library and save. profs := make([]*seq.Profile, structLib.Size()) for i := 0; i < structLib.Size(); i++ { profs[i] = freqProfiles[i].Profile(nullProfile) } lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }
func main() { lib := util.StructureLibrary(util.Arg(0)) fmap := util.FmapRead(util.Arg(1)) util.BowWrite(util.CreateFile(util.Arg(2)), fmap.StructureBow(lib)) }
func main() { pdbEntry := util.PDBRead(flag.Arg(0)) fasEntries := make([]seq.Sequence, 0, 5) if !flagSeparateChains { var fasEntry seq.Sequence if len(pdbEntry.Chains) == 1 { fasEntry.Name = chainHeader(pdbEntry.OneChain()) } else { fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode)) } seq := make([]seq.Residue, 0, 100) for _, chain := range pdbEntry.Chains { if isChainUsable(chain) { seq = append(seq, chain.Sequence...) } } fasEntry.Residues = seq if len(fasEntry.Residues) == 0 { util.Fatalf("Could not find any amino acids.") } fasEntries = append(fasEntries, fasEntry) } else { for _, chain := range pdbEntry.Chains { if !isChainUsable(chain) { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: chain.Sequence, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
func mkSeqHMM(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Stores intermediate files produced by hhmake. tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm") util.Assert(err, "Could not create temporary directory.") defer os.RemoveAll(tempDir) // Initialize a MSA for each structural fragment. var msas []seq.MSA var msaChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { msa := seq.NewMSA() msa.SetLen(structLib.FragmentSize()) msas = append(msas, msa) msaChans = append(msaChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. for i := 0; i < structLib.Size(); i++ { addToMSA(msaChans[i], &msas[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nil, msaChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. for i := 0; i < structLib.Size(); i++ { close(msaChans[i]) } wgSeqFragments.Wait() util.Verbosef("Building profile HMMs from MSAs...") // Finally, add the sequence fragments to a new sequence fragment // library and save. hmms := make([]*seq.HMM, structLib.Size()) hhmake := func(i int) struct{} { fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i)) f := util.CreateFile(fname) util.Assert(msa.WriteFasta(f, msas[i])) hhm, err := hhsuite.HHMakePseudo.Run(fname) util.Assert(err) hmms[i] = hhm.HMM return struct{}{} // my unifier sucks, i guess } fun.ParMap(hhmake, fun.Range(0, structLib.Size())) lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }