func main() { if len(util.FlagCpuProf) > 0 { f := util.CreateFile(util.FlagCpuProf) pprof.StartCPUProfile(f) defer f.Close() defer pprof.StopCPUProfile() } // Read all CATH domains, the best-of-all matrix, and the matrix for // each aligner. domains := readDomains(util.Arg(0)) boa := readMatrix(domains, util.Arg(1)) aligners := make([]aligner, 0) flibs := make([]flib, 0) for i := 2; i < util.NArg(); i += 2 { fpath := util.Arg(i) if path.Ext(fpath) == ".bowdb" { db := util.OpenBowDB(fpath) records, err := db.ReadAll() util.Assert(err) bowed := make([]bow.Bowed, domains.in.Len()) for _, b := range records { if !domains.in.Exists(b.Id) { util.Fatalf("Found ID in bowdb that isn't in the list "+ "of CATH domains provided: %s", b.Id) } bowed[domains.in.Atom(b.Id)] = b } flibs = append(flibs, flib{db, bowed, util.Arg(i + 1)}) } else { aligners = append(aligners, aligner{ readMatrix(domains, fpath), util.Arg(i + 1), }) } } // Now remove CATH domains that don't have a corresponding structure file. // We don't do this initially since the matrix files are indexed with // respect to all CATH domains (includings ones without structure). // This is an artifact of the fact that the matrices were generated with // a very old version of CATH. domains.removeOldDomains() if a := matrixAuc(domains, boa, boa, flagThreshold); a != 1.0 { util.Fatalf("Something is wrong. The AUC of the best-of-all matrix "+ "with respect to itself is %f, but it should be 1.0.", a) } if len(aligners) > 0 { fmt.Println("Computing AUC for aligners...") writeAuc := func(aligner aligner) struct{} { w := util.CreateFile(aligner.outpath) a := matrixAuc(domains, boa, aligner.matrix, flagThreshold) fmt.Fprintf(w, "%f\n", a) return struct{}{} } fun.ParMap(writeAuc, aligners) } if len(flibs) > 0 { fmt.Println("Computing AUC for bowdbs...") writeAuc := func(flib flib) struct{} { w := util.CreateFile(flib.outpath) a := flibAuc(domains, boa, flib, flagThreshold) fmt.Fprintf(w, "%f\n", a) return struct{}{} } fun.ParMap(writeAuc, flibs) } }
func mkSeqHMM(c *command) { c.assertLeastNArg(3) structLib := util.StructureLibrary(c.flags.Arg(0)) outPath := c.flags.Arg(1) entries := c.flags.Args()[2:] util.AssertOverwritable(outPath, flagOverwrite) saveto := util.CreateFile(outPath) // Stores intermediate files produced by hhmake. tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm") util.Assert(err, "Could not create temporary directory.") defer os.RemoveAll(tempDir) // Initialize a MSA for each structural fragment. var msas []seq.MSA var msaChans []chan seq.Sequence for i := 0; i < structLib.Size(); i++ { msa := seq.NewMSA() msa.SetLen(structLib.FragmentSize()) msas = append(msas, msa) msaChans = append(msaChans, make(chan seq.Sequence)) } // Now spin up a goroutine for each fragment that is responsible for // adding a sequence slice to itself. for i := 0; i < structLib.Size(); i++ { addToMSA(msaChans[i], &msas[i]) } // Create a channel that sends the PDB entries given. entryChan := make(chan string) go func() { for _, fp := range entries { entryChan <- fp } close(entryChan) }() progress := util.NewProgress(len(entries)) for i := 0; i < flagCpu; i++ { wgPDBChains.Add(1) go func() { for entryPath := range entryChan { _, chains, err := util.PDBOpen(entryPath) progress.JobDone(err) if err != nil { continue } for _, chain := range chains { structureToSequence(structLib, chain, nil, msaChans) } } wgPDBChains.Done() }() } wgPDBChains.Wait() progress.Close() // We've finishing reading all the PDB inputs. Now close the channels // and let the sequence fragments finish. for i := 0; i < structLib.Size(); i++ { close(msaChans[i]) } wgSeqFragments.Wait() util.Verbosef("Building profile HMMs from MSAs...") // Finally, add the sequence fragments to a new sequence fragment // library and save. hmms := make([]*seq.HMM, structLib.Size()) hhmake := func(i int) struct{} { fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i)) f := util.CreateFile(fname) util.Assert(msa.WriteFasta(f, msas[i])) hhm, err := hhsuite.HHMakePseudo.Run(fname) util.Assert(err) hmms[i] = hhm.HMM return struct{}{} // my unifier sucks, i guess } fun.ParMap(hhmake, fun.Range(0, structLib.Size())) lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms) util.Assert(err) util.Assert(fragbag.Save(saveto, lib)) }