Ejemplo n.º 1
0
// structureToSequence uses structural fragments to categorize a segment
// of alpha-carbon atoms, and adds the corresponding residues to a
// corresponding sequence fragment.
func structureToSequence(
	lib fragbag.StructureLibrary,
	chain *pdb.Chain,
	nullChan chan seq.Sequence,
	seqChans []chan seq.Sequence,
) {
	sequence := chain.AsSequence()
	fragSize := lib.FragmentSize()

	// If the chain is shorter than the fragment size, we can do nothing
	// with it.
	if sequence.Len() < fragSize {
		util.Verbosef("Sequence '%s' is too short (length: %d)",
			sequence.Name, sequence.Len())
		return
	}

	// If we're accumulating a null model, add this sequence to it.
	if nullChan != nil {
		nullChan <- sequence
	}

	// This bit of trickery here is all about getting the call to
	// SequenceCaAtoms outside of the loop. In particular, it's a very
	// expensive call since it has to reconcile inconsistencies between
	// SEQRES and ATOM records in PDB files.
	limit := sequence.Len() - fragSize
	atoms := chain.SequenceCaAtoms()
	atomSlice := make([]structure.Coords, fragSize)
	noGaps := func(atoms []*structure.Coords) []structure.Coords {
		for i, atom := range atoms {
			if atom == nil {
				return nil
			}
			atomSlice[i] = *atom
		}
		return atomSlice
	}
	for start := 0; start <= limit; start++ {
		end := start + fragSize
		cas := noGaps(atoms[start:end])
		if cas == nil {
			// Nothing contiguous was found (a "disordered" residue perhaps).
			// So skip this part of the chain.
			continue
		}
		bestFrag := lib.BestStructureFragment(atomSlice)

		sliced := sequence.Slice(start, end)
		seqChans[bestFrag] <- sliced
	}
}
Ejemplo n.º 2
0
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
Ejemplo n.º 3
-1
func main() {
	outDir := util.Arg(0)
	fasInps := util.Args()[1:]

	util.Assert(os.MkdirAll(outDir, 0777))

	fastaChan := make(chan string)
	wg := new(sync.WaitGroup)
	for i := 0; i < max(1, runtime.GOMAXPROCS(0)); i++ {
		go func() {
			wg.Add(1)
			for fasta := range fastaChan {
				util.Verbosef("Computing map for '%s'...", fasta)
				fmap := util.GetFmap(fasta)
				outF := path.Join(outDir, fmt.Sprintf("%s.fmap", fmap.Name))
				util.FmapWrite(util.CreateFile(outF), fmap)
			}
			wg.Done()
		}()
	}

	for _, fasta := range fasInps {
		fastaChan <- fasta
	}

	close(fastaChan)
	wg.Wait()
}