func main() { outDir := util.Arg(0) fasInps := util.Args()[1:] util.Assert(os.MkdirAll(outDir, 0777)) fastaChan := make(chan string) wg := new(sync.WaitGroup) for i := 0; i < max(1, runtime.GOMAXPROCS(0)); i++ { go func() { wg.Add(1) for fasta := range fastaChan { util.Verbosef("Computing map for '%s'...", fasta) fmap := util.GetFmap(fasta) outF := path.Join(outDir, fmt.Sprintf("%s.fmap", fmap.Name)) util.FmapWrite(util.CreateFile(outF), fmap) } wg.Done() }() } for _, fasta := range fasInps { fastaChan <- fasta } close(fastaChan) wg.Wait() }
// structureToSequence uses structural fragments to categorize a segment // of alpha-carbon atoms, and adds the corresponding residues to a // corresponding sequence fragment. func structureToSequence( lib fragbag.StructureLibrary, chain *pdb.Chain, nullChan chan seq.Sequence, seqChans []chan seq.Sequence, ) { sequence := chain.AsSequence() fragSize := lib.FragmentSize() // If the chain is shorter than the fragment size, we can do nothing // with it. if sequence.Len() < fragSize { util.Verbosef("Sequence '%s' is too short (length: %d)", sequence.Name, sequence.Len()) return } // If we're accumulating a null model, add this sequence to it. if nullChan != nil { nullChan <- sequence } // This bit of trickery here is all about getting the call to // SequenceCaAtoms outside of the loop. In particular, it's a very // expensive call since it has to reconcile inconsistencies between // SEQRES and ATOM records in PDB files. limit := sequence.Len() - fragSize atoms := chain.SequenceCaAtoms() atomSlice := make([]structure.Coords, fragSize) noGaps := func(atoms []*structure.Coords) []structure.Coords { for i, atom := range atoms { if atom == nil { return nil } atomSlice[i] = *atom } return atomSlice } for start := 0; start <= limit; start++ { end := start + fragSize cas := noGaps(atoms[start:end]) if cas == nil { // Nothing contiguous was found (a "disordered" residue perhaps). // So skip this part of the chain. continue } bestFrag := lib.BestStructureFragment(atomSlice) sliced := sequence.Slice(start, end) seqChans[bestFrag] <- sliced } }
func mkSeqHMM(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Stores intermediate files produced by hhmake. tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm") util.Assert(err, "Could not create temporary directory.") defer os.RemoveAll(tempDir) // Initialize a MSA for each structural fragment. var msas []seq.MSA var msaChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { msa := seq.NewMSA() msa.SetLen(structLib.FragmentSize()) msas = append(msas, msa) msaChans = append(msaChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. for i := 0; i < structLib.Size(); i++ { addToMSA(msaChans[i], &msas[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nil, msaChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. for i := 0; i < structLib.Size(); i++ { close(msaChans[i]) } wgSeqFragments.Wait() util.Verbosef("Building profile HMMs from MSAs...") // Finally, add the sequence fragments to a new sequence fragment // library and save. hmms := make([]*seq.HMM, structLib.Size()) hhmake := func(i int) struct{} { fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i)) f := util.CreateFile(fname) util.Assert(msa.WriteFasta(f, msas[i])) hhm, err := hhsuite.HHMakePseudo.Run(fname) util.Assert(err) hmms[i] = hhm.HMM return struct{}{} // my unifier sucks, i guess } fun.ParMap(hhmake, fun.Range(0, structLib.Size())) lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }