Пример #1
0
func main() {
	flag.BoolVar(&flagAllFragments, "all-fragments", flagAllFragments,
		"When set, all fragments will be shown, even if the best fragment\n"+
			"of each ATOM set is the same.")
	util.FlagParse(
		"fraglib align.{fasta,ali,a2m,a3m} pdb-file out-csv",
		"Writes a CSV file to out-csv containing the best matching fragment\n"+
			"for each pairwise contiguous set of alpha-carbon atoms of the\n"+
			"first two proteins in the alignment and PDB file.")
	util.AssertNArg(4)
	flib := util.StructureLibrary(util.Arg(0))
	aligned := util.MSA(util.Arg(1))
	pentry := util.PDBRead(util.Arg(2))
	outcsv := util.CreateFile(util.Arg(3))

	csvWriter := csv.NewWriter(outcsv)
	csvWriter.Comma = '\t'
	defer csvWriter.Flush()

	pf := func(record ...string) {
		util.Assert(csvWriter.Write(record), "Problem writing to '%s'", outcsv)
	}
	pf("start1", "end1", "start2", "end2", "frag1", "frag2", "frag_rmsd")
	iter := newContiguous(
		flib.FragmentSize(),
		aligned.GetFasta(0), aligned.GetFasta(1),
		pentry.Chains[0], pentry.Chains[1])
	for iter.next() {
		best1 := flib.BestStructureFragment(iter.atoms1)
		best2 := flib.BestStructureFragment(iter.atoms2)
		if !flagAllFragments && best1 == best2 {
			continue
		}
		bestRmsd := structure.RMSD(flib.Atoms(best1), flib.Atoms(best2))
		pf(
			fmt.Sprintf("%d", iter.s1()),
			fmt.Sprintf("%d", iter.e1()),
			fmt.Sprintf("%d", iter.s2()),
			fmt.Sprintf("%d", iter.e2()),
			fmt.Sprintf("%d", best1),
			fmt.Sprintf("%d", best2),
			fmt.Sprintf("%f", bestRmsd),
		)
	}
}
Пример #2
0
func main() {
	libPath := util.Arg(0)
	chain := util.Arg(1)
	pdbEntryPath := util.Arg(2)
	bowOut := util.Arg(3)

	lib := util.StructureLibrary(libPath)
	entry := util.PDBRead(pdbEntryPath)

	thechain := entry.Chain(chain[0])
	if thechain == nil || !thechain.IsProtein() {
		util.Fatalf("Could not find chain with identifier '%c'.", chain[0])
	}

	bow := bow.BowerFromChain(thechain).StructureBow(lib)
	if bowOut == "--" {
		fmt.Println(bow)
	} else {
		util.BowWrite(util.CreateFile(bowOut), bow)
	}
}
Пример #3
0
func main() {
	lib = util.StructureLibrary(util.Arg(0))
	pdbEntry := util.PDBRead(util.Arg(1))

	if util.NArg() == 2 {
		for _, chain := range pdbEntry.Chains {
			atoms := chain.CaAtoms()
			bestFragsForRegion(chain, atoms, 0, len(atoms))
		}
	} else {
		chainId := util.Arg(2)
		chain := pdbEntry.Chain(chainId[0])
		if chain == nil || !chain.IsProtein() {
			util.Fatalf("Could not find protein chain with id '%c'.", chainId)
		}
		atoms := chain.CaAtoms()

		if util.NArg() == 3 {
			bestFragsForRegion(chain, atoms, 0, len(atoms))
		} else {
			if util.NArg() != 5 {
				log.Println("Both a start and end must be provided.")
				util.Usage()
			}

			s, e := util.Arg(3), util.Arg(4)
			sn, en := util.ParseInt(s)-1, util.ParseInt(e)
			if en-sn < lib.FragmentSize() {
				util.Fatalf("The range [%s, %s] specifies %d alpha-carbon "+
					"atoms while at least %d alpha-carbon atoms are required "+
					"for the given fragment library.",
					s, e, en-sn, lib.FragmentSize())
			}
			bestFragsForRegion(chain, atoms, sn, en)
		}
	}
}
Пример #4
0
func mkSeqProfile(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Initialize a frequency and null profile for each structural fragment.
	var freqProfiles []*seq.FrequencyProfile
	var fpChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		fp := seq.NewFrequencyProfile(structLib.FragmentSize())
		freqProfiles = append(freqProfiles, fp)
		fpChans = append(fpChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	nullChan, nullProfile := addToNull()
	for i := 0; i < structLib.Size(); i++ {
		addToProfile(fpChans[i], freqProfiles[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nullChan, fpChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	close(nullChan)
	for i := 0; i < structLib.Size(); i++ {
		close(fpChans[i])
	}
	wgSeqFragments.Wait()

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	profs := make([]*seq.Profile, structLib.Size())
	for i := 0; i < structLib.Size(); i++ {
		profs[i] = freqProfiles[i].Profile(nullProfile)
	}
	lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
Пример #5
0
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
Пример #6
0
func main() {
	lib := util.StructureLibrary(util.Arg(0))
	fmap := util.FmapRead(util.Arg(1))
	util.BowWrite(util.CreateFile(util.Arg(2)), fmap.StructureBow(lib))
}
Пример #7
0
func main() {
	libFile := util.Arg(0)
	brkFile := util.Arg(1)
	lib := util.StructureLibrary(libFile)

	stderrf("Loading PDB files into memory...\n")
	entries := make([]*pdb.Entry, util.NArg()-2)
	for i, pdbfile := range flag.Args()[2:] {
		entries[i] = util.PDBRead(pdbfile)
	}

	stderrf("Comparing the results of old fragbag and new fragbag on " +
		"each PDB file...\n")
	for _, entry := range entries {
		stderrf("Testing %s...\n", entry.Path)
		fmt.Printf("Testing %s\n", entry.Path)

		// Try to run old fragbag first. The output is an old-style BOW.
		oldBowStr, err := runOldFragbag(brkFile, entry.Path, lib.Size(),
			lib.FragmentSize())
		if err != nil {
			fmt.Println(err)
			fmt.Printf("The output was:\n%s\n", oldBowStr)
			divider()
			continue
		}

		oldBow, err := bow.NewOldStyleBow(lib.Size(), oldBowStr)
		if err != nil {
			fmt.Printf("Could not parse the following as an old style "+
				"BOW:\n%s\n", oldBowStr)
			fmt.Printf("%s\n", err)
			divider()
			continue
		}

		// Now use package fragbag to compute a BOW.
		var newBow bow.Bow
		if flagOldStyle {
			newBow = oldStyle{entry}.StructureBow(lib)
		} else {
			newBow = newStyle{entry}.StructureBow(lib)
		}

		// Create a diff and check if they are the same. If so, we passed.
		// Otherwise, print an error report.
		diff := bow.NewBowDiff(oldBow, newBow)
		if diff.IsSame() {
			fmt.Println("PASSED.")
			divider()
			continue
		}

		// Ruh roh...
		fmt.Println("FAILED.")
		fmt.Printf("\nOld BOW:\n%s\n\nNew BOW:\n%s\n", oldBow, newBow)
		fmt.Printf("\nDiff:\n%s\n", diff)
		divider()
	}
	stderrf("Done!\n")
}