Ejemplo n.º 1
0
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}

	// Read all CATH domains, the best-of-all matrix, and the matrix for
	// each aligner.
	domains := readDomains(util.Arg(0))
	boa := readMatrix(domains, util.Arg(1))
	aligners := make([]aligner, 0)
	flibs := make([]flib, 0)
	for i := 2; i < util.NArg(); i += 2 {
		fpath := util.Arg(i)
		if path.Ext(fpath) == ".bowdb" {
			db := util.OpenBowDB(fpath)
			records, err := db.ReadAll()
			util.Assert(err)

			bowed := make([]bow.Bowed, domains.in.Len())
			for _, b := range records {
				if !domains.in.Exists(b.Id) {
					util.Fatalf("Found ID in bowdb that isn't in the list "+
						"of CATH domains provided: %s", b.Id)
				}
				bowed[domains.in.Atom(b.Id)] = b
			}
			flibs = append(flibs, flib{db, bowed, util.Arg(i + 1)})
		} else {
			aligners = append(aligners, aligner{
				readMatrix(domains, fpath),
				util.Arg(i + 1),
			})
		}
	}
	// Now remove CATH domains that don't have a corresponding structure file.
	// We don't do this initially since the matrix files are indexed with
	// respect to all CATH domains (includings ones without structure).
	// This is an artifact of the fact that the matrices were generated with
	// a very old version of CATH.
	domains.removeOldDomains()

	if a := matrixAuc(domains, boa, boa, flagThreshold); a != 1.0 {
		util.Fatalf("Something is wrong. The AUC of the best-of-all matrix "+
			"with respect to itself is %f, but it should be 1.0.", a)
	}

	if len(aligners) > 0 {
		fmt.Println("Computing AUC for aligners...")
		writeAuc := func(aligner aligner) struct{} {
			w := util.CreateFile(aligner.outpath)
			a := matrixAuc(domains, boa, aligner.matrix, flagThreshold)
			fmt.Fprintf(w, "%f\n", a)
			return struct{}{}
		}
		fun.ParMap(writeAuc, aligners)
	}
	if len(flibs) > 0 {
		fmt.Println("Computing AUC for bowdbs...")
		writeAuc := func(flib flib) struct{} {
			w := util.CreateFile(flib.outpath)
			a := flibAuc(domains, boa, flib, flagThreshold)
			fmt.Fprintf(w, "%f\n", a)
			return struct{}{}
		}
		fun.ParMap(writeAuc, flibs)
	}
}
Ejemplo n.º 2
0
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}