func mkPaired(c *command) { c.assertNArg(2) in := util.Library(c.flags.Arg(0)) outPath := c.flags.Arg(1) util.AssertOverwritable(outPath, flagOverwrite) if _, ok := in.(fragbag.WeightedLibrary); ok { util.Fatalf("%s is a weighted library (not allowed)", in.Name()) } name := fmt.Sprintf("paired-%s", in.Name()) if fragbag.IsStructure(in) { var pairs [][]structure.Coords lib := in.(fragbag.StructureLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Atoms(i), lib.Atoms(j) pairs = append(pairs, append(f1, f2...)) } } pairLib, err := fragbag.NewStructureAtoms(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "hmm") { var pairs []*seq.HMM lib := in.(fragbag.SequenceLibrary) nfrags := lib.Size() for i := 0; i < nfrags; i++ { for j := 0; j < nfrags; j++ { if i == j { continue } f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM) pairs = append(pairs, seq.HMMCat(f1, f2)) } } pairLib, err := fragbag.NewSequenceHMM(name, pairs) util.Assert(err) fragbag.Save(util.CreateFile(outPath), pairLib) } else if strings.Contains(in.Tag(), "profile") { util.Fatalf("Sequence profiles not implemented.") } else { util.Fatalf("Unrecognized fragment library: %s", in.Tag()) } }
func mkStructure(c *command) { c.assertNArg(2) brkFile := c.flags.Arg(0) saveto := c.flags.Arg(1) util.AssertOverwritable(saveto, flagOverwrite) brkContents, err := ioutil.ReadAll(util.OpenFile(c.flags.Arg(0))) util.Assert(err) pdbFragments := bytes.Split(brkContents, []byte("TER")) fragments := make([][]structure.Coords, 0) for i, pdbFrag := range pdbFragments { pdbFrag = bytes.TrimSpace(pdbFrag) if len(pdbFrag) == 0 { continue } fragments = append(fragments, coords(i, pdbFrag)) } libName := stripExt(path.Base(brkFile)) lib, err := fragbag.NewStructureAtoms(libName, fragments) util.Assert(err) fragbag.Save(util.CreateFile(saveto), lib) }
// Create creates a new BOW database on disk at 'dir'. If the directory // already exists or cannot be created, an error is returned. // // When you're finished adding entries, you must call Close. // // Once a BOW database is created, it cannot be modified. (This restriction // may be lifted in the future.) func Create(lib fragbag.Library, fpath string) (*DB, error) { if _, err := os.Stat(fpath); err == nil || !os.IsNotExist(err) { return nil, fmt.Errorf("BOW database '%s' already exists.", fpath) } outf, err := os.Create(fpath) if err != nil { return nil, err } db := &DB{ Lib: lib, Name: path.Base(fpath), tw: tar.NewWriter(outf), saveBuf: new(bytes.Buffer), writeBuf: new(bytes.Buffer), entryChan: make(chan bow.Bowed), writingDone: make(chan struct{}), } // Put all bow DB files in a directory within the archive. hdrDir := db.newHdrDir(db.dirName()) if err := db.tw.WriteHeader(hdrDir); err != nil { return nil, err } // Create an entry for the fragment library. Copy the bytes. flibBytes := new(bytes.Buffer) if err := fragbag.Save(flibBytes, db.Lib); err != nil { return nil, fmt.Errorf("Could not copy fragment library: %s", err) } hdr := db.newHdr(fileFragLib, flibBytes.Len()) if err := db.tw.WriteHeader(hdr); err != nil { return nil, err } if _, err := db.tw.Write(flibBytes.Bytes()); err != nil { return nil, err } // Now spin up a goroutine that is responsible for writing entries. go func() { for entry := range db.entryChan { if err = db.write(entry); err != nil { log.Printf("Could not write to %s: %s", fileBowDB, err) } } db.writingDone <- struct{}{} }() return db, nil }
func mkSeqProfile(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Initialize a frequency and null profile for each structural fragment. var freqProfiles []*seq.FrequencyProfile var fpChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { fp := seq.NewFrequencyProfile(structLib.FragmentSize()) freqProfiles = append(freqProfiles, fp) fpChans = append(fpChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. nullChan, nullProfile := addToNull() for i := 0; i < structLib.Size(); i++ { addToProfile(fpChans[i], freqProfiles[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nullChan, fpChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. close(nullChan) for i := 0; i < structLib.Size(); i++ { close(fpChans[i]) } wgSeqFragments.Wait() // Finally, add the sequence fragments to a new sequence fragment // library and save. profs := make([]*seq.Profile, structLib.Size()) for i := 0; i < structLib.Size(); i++ { profs[i] = freqProfiles[i].Profile(nullProfile) } lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }
func mkSeqHMM(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Stores intermediate files produced by hhmake. tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm") util.Assert(err, "Could not create temporary directory.") defer os.RemoveAll(tempDir) // Initialize a MSA for each structural fragment. var msas []seq.MSA var msaChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { msa := seq.NewMSA() msa.SetLen(structLib.FragmentSize()) msas = append(msas, msa) msaChans = append(msaChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. for i := 0; i < structLib.Size(); i++ { addToMSA(msaChans[i], &msas[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nil, msaChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. for i := 0; i < structLib.Size(); i++ { close(msaChans[i]) } wgSeqFragments.Wait() util.Verbosef("Building profile HMMs from MSAs...") // Finally, add the sequence fragments to a new sequence fragment // library and save. hmms := make([]*seq.HMM, structLib.Size()) hhmake := func(i int) struct{} { fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i)) f := util.CreateFile(fname) util.Assert(msa.WriteFasta(f, msas[i])) hhm, err := hhsuite.HHMakePseudo.Run(fname) util.Assert(err) hmms[i] = hhm.HMM return struct{}{} // my unifier sucks, i guess } fun.ParMap(hhmake, fun.Range(0, structLib.Size())) lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }