Ejemplo n.º 1
0
func main() {
	flag.BoolVar(&flagAllFragments, "all-fragments", flagAllFragments,
		"When set, all fragments will be shown, even if the best fragment\n"+
			"of each ATOM set is the same.")
	util.FlagParse(
		"fraglib align.{fasta,ali,a2m,a3m} pdb-file out-csv",
		"Writes a CSV file to out-csv containing the best matching fragment\n"+
			"for each pairwise contiguous set of alpha-carbon atoms of the\n"+
			"first two proteins in the alignment and PDB file.")
	util.AssertNArg(4)
	flib := util.StructureLibrary(util.Arg(0))
	aligned := util.MSA(util.Arg(1))
	pentry := util.PDBRead(util.Arg(2))
	outcsv := util.CreateFile(util.Arg(3))

	csvWriter := csv.NewWriter(outcsv)
	csvWriter.Comma = '\t'
	defer csvWriter.Flush()

	pf := func(record ...string) {
		util.Assert(csvWriter.Write(record), "Problem writing to '%s'", outcsv)
	}
	pf("start1", "end1", "start2", "end2", "frag1", "frag2", "frag_rmsd")
	iter := newContiguous(
		flib.FragmentSize(),
		aligned.GetFasta(0), aligned.GetFasta(1),
		pentry.Chains[0], pentry.Chains[1])
	for iter.next() {
		best1 := flib.BestStructureFragment(iter.atoms1)
		best2 := flib.BestStructureFragment(iter.atoms2)
		if !flagAllFragments && best1 == best2 {
			continue
		}
		bestRmsd := structure.RMSD(flib.Atoms(best1), flib.Atoms(best2))
		pf(
			fmt.Sprintf("%d", iter.s1()),
			fmt.Sprintf("%d", iter.e1()),
			fmt.Sprintf("%d", iter.s2()),
			fmt.Sprintf("%d", iter.e2()),
			fmt.Sprintf("%d", best1),
			fmt.Sprintf("%d", best2),
			fmt.Sprintf("%f", bestRmsd),
		)
	}
}
Ejemplo n.º 2
0
// RMSDChains is the same as RMSD, except it uses *Chain values directly.
func RMSDChains(chain1 *Chain, start1, end1 int,
	chain2 *Chain, start2, end2 int) (float64, error) {

	// In order to fetch the appropriate carbon-alpha atoms, we need to
	// traverse each chain's carbon-alpha atom slice and pick only the carbon
	// alpha atoms with residue indices in the range specified.
	struct1 := chain1.SequenceCaAtomSlice(start1-1, end1)
	struct2 := chain2.SequenceCaAtomSlice(start2-1, end2)

	// Verify that neither of the atom sets is 0.
	if struct1 == nil || len(struct1) == 0 {
		return 0.0, fmt.Errorf("The range '%d-%d' (for chain %c in %s) does "+
			"not correspond to any carbon-alpha ATOM records.",
			start1, end1, chain1.Ident, chain1.Entry.Path)
	}
	if struct2 == nil || len(struct2) == 0 {
		return 0.0, fmt.Errorf("The range '%d-%d' (for chain %c in %s) does "+
			"not correspond to any carbon-alpha ATOM records.",
			start2, end2, chain2.Ident, chain2.Entry.Path)
	}

	// If we don't have the same number of atoms from each chain, we can't
	// compute RMSD.
	if len(struct1) != len(struct2) {
		return 0.0, fmt.Errorf("The range '%d-%d' (%d ATOM records for chain "+
			"%c in %s) does not correspond to the same number of carbon-alpha "+
			"atoms as the range '%d-%d' (%d ATOM records for chain %c in %s). "+
			"It is possible that the PDB file does not contain a carbon-alpha "+
			"atom for every residue index in the ranges.",
			start1, end1, len(struct1), chain1.Ident, chain1.Entry.Path,
			start2, end2, len(struct2), chain2.Ident, chain2.Entry.Path)
	}

	// We're good to go...
	return structure.RMSD(struct1, struct2), nil
}
Ejemplo n.º 3
0
func main() {
	fmapPath := util.Arg(0)

	fmap := util.FmapRead(fmapPath)
	qchain := getPdbChain(fmapPath)
	stats := newSequenceStats(qchain.Sequence)

	total, trueps := 0, 0
	qcorrupt, tcorrupt := 0, 0
	for _, frags := range fmap.Segments {
		for _, frag := range frags.Frags {
			hit := frag.Hit

			if frag.IsCorrupt() {
				tcorrupt += 1
				stats.incTCorrupt(hit)
				continue
			}

			qatoms := qchain.SequenceCaAtomSlice(hit.QueryStart-1, hit.QueryEnd)
			if qatoms == nil {
				qcorrupt += 1
				stats.incQCorrupt(hit)
				continue
			}

			if len(qatoms) != len(frag.CaAtoms) {
				util.Fatalf("Uncomparable lengths. Query is (%d, %d) while "+
					"template is (%d, %d). Length of query CaAtoms: %d, "+
					"length of template CaAtoms: %d",
					hit.QueryStart, hit.QueryEnd,
					hit.TemplateStart, hit.TemplateEnd,
					len(qatoms), len(frag.CaAtoms))
			}

			if structure.RMSD(qatoms, frag.CaAtoms) <= flagRmsd {
				trueps += 1
				stats.incTruePs(hit)
			}
			total += 1
			stats.incTotal(hit)
		}
	}

	coveredResidues := 0
	for _, resStats := range stats {
		if resStats.trueps >= 1 {
			coveredResidues += 1
		}
	}
	coverage := float64(coveredResidues) / float64(len(qchain.Sequence))

	fmt.Printf("RMSDThreshold: %f\n", flagRmsd)
	fmt.Printf("TotalFragments: %d\n", total)
	fmt.Printf("TruePositives: %d\n", trueps)
	fmt.Printf("Precision: %f\n", float64(trueps)/float64(total))
	fmt.Printf("CorruptQuery: %d\n", qcorrupt)
	fmt.Printf("CorruptTemplate: %d\n", tcorrupt)
	fmt.Printf("TotalResidues: %d\n", len(qchain.Sequence))
	fmt.Printf("CoveredResidues: %d\n", coveredResidues)
	fmt.Printf("Coverage: %f\n", coverage)
}