func main() { flag.BoolVar(&flagAllFragments, "all-fragments", flagAllFragments, "When set, all fragments will be shown, even if the best fragment\n"+ "of each ATOM set is the same.") util.FlagParse( "fraglib align.{fasta,ali,a2m,a3m} pdb-file out-csv", "Writes a CSV file to out-csv containing the best matching fragment\n"+ "for each pairwise contiguous set of alpha-carbon atoms of the\n"+ "first two proteins in the alignment and PDB file.") util.AssertNArg(4) flib := util.StructureLibrary(util.Arg(0)) aligned := util.MSA(util.Arg(1)) pentry := util.PDBRead(util.Arg(2)) outcsv := util.CreateFile(util.Arg(3)) csvWriter := csv.NewWriter(outcsv) csvWriter.Comma = '\t' defer csvWriter.Flush() pf := func(record ...string) { util.Assert(csvWriter.Write(record), "Problem writing to '%s'", outcsv) } pf("start1", "end1", "start2", "end2", "frag1", "frag2", "frag_rmsd") iter := newContiguous( flib.FragmentSize(), aligned.GetFasta(0), aligned.GetFasta(1), pentry.Chains[0], pentry.Chains[1]) for iter.next() { best1 := flib.BestStructureFragment(iter.atoms1) best2 := flib.BestStructureFragment(iter.atoms2) if !flagAllFragments && best1 == best2 { continue } bestRmsd := structure.RMSD(flib.Atoms(best1), flib.Atoms(best2)) pf( fmt.Sprintf("%d", iter.s1()), fmt.Sprintf("%d", iter.e1()), fmt.Sprintf("%d", iter.s2()), fmt.Sprintf("%d", iter.e2()), fmt.Sprintf("%d", best1), fmt.Sprintf("%d", best2), fmt.Sprintf("%f", bestRmsd), ) } }
// RMSDChains is the same as RMSD, except it uses *Chain values directly. func RMSDChains(chain1 *Chain, start1, end1 int, chain2 *Chain, start2, end2 int) (float64, error) { // In order to fetch the appropriate carbon-alpha atoms, we need to // traverse each chain's carbon-alpha atom slice and pick only the carbon // alpha atoms with residue indices in the range specified. struct1 := chain1.SequenceCaAtomSlice(start1-1, end1) struct2 := chain2.SequenceCaAtomSlice(start2-1, end2) // Verify that neither of the atom sets is 0. if struct1 == nil || len(struct1) == 0 { return 0.0, fmt.Errorf("The range '%d-%d' (for chain %c in %s) does "+ "not correspond to any carbon-alpha ATOM records.", start1, end1, chain1.Ident, chain1.Entry.Path) } if struct2 == nil || len(struct2) == 0 { return 0.0, fmt.Errorf("The range '%d-%d' (for chain %c in %s) does "+ "not correspond to any carbon-alpha ATOM records.", start2, end2, chain2.Ident, chain2.Entry.Path) } // If we don't have the same number of atoms from each chain, we can't // compute RMSD. if len(struct1) != len(struct2) { return 0.0, fmt.Errorf("The range '%d-%d' (%d ATOM records for chain "+ "%c in %s) does not correspond to the same number of carbon-alpha "+ "atoms as the range '%d-%d' (%d ATOM records for chain %c in %s). "+ "It is possible that the PDB file does not contain a carbon-alpha "+ "atom for every residue index in the ranges.", start1, end1, len(struct1), chain1.Ident, chain1.Entry.Path, start2, end2, len(struct2), chain2.Ident, chain2.Entry.Path) } // We're good to go... return structure.RMSD(struct1, struct2), nil }
func main() { fmapPath := util.Arg(0) fmap := util.FmapRead(fmapPath) qchain := getPdbChain(fmapPath) stats := newSequenceStats(qchain.Sequence) total, trueps := 0, 0 qcorrupt, tcorrupt := 0, 0 for _, frags := range fmap.Segments { for _, frag := range frags.Frags { hit := frag.Hit if frag.IsCorrupt() { tcorrupt += 1 stats.incTCorrupt(hit) continue } qatoms := qchain.SequenceCaAtomSlice(hit.QueryStart-1, hit.QueryEnd) if qatoms == nil { qcorrupt += 1 stats.incQCorrupt(hit) continue } if len(qatoms) != len(frag.CaAtoms) { util.Fatalf("Uncomparable lengths. Query is (%d, %d) while "+ "template is (%d, %d). Length of query CaAtoms: %d, "+ "length of template CaAtoms: %d", hit.QueryStart, hit.QueryEnd, hit.TemplateStart, hit.TemplateEnd, len(qatoms), len(frag.CaAtoms)) } if structure.RMSD(qatoms, frag.CaAtoms) <= flagRmsd { trueps += 1 stats.incTruePs(hit) } total += 1 stats.incTotal(hit) } } coveredResidues := 0 for _, resStats := range stats { if resStats.trueps >= 1 { coveredResidues += 1 } } coverage := float64(coveredResidues) / float64(len(qchain.Sequence)) fmt.Printf("RMSDThreshold: %f\n", flagRmsd) fmt.Printf("TotalFragments: %d\n", total) fmt.Printf("TruePositives: %d\n", trueps) fmt.Printf("Precision: %f\n", float64(trueps)/float64(total)) fmt.Printf("CorruptQuery: %d\n", qcorrupt) fmt.Printf("CorruptTemplate: %d\n", tcorrupt) fmt.Printf("TotalResidues: %d\n", len(qchain.Sequence)) fmt.Printf("CoveredResidues: %d\n", coveredResidues) fmt.Printf("Coverage: %f\n", coverage) }