示例#1
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}
	if len(flagGobIt) > 0 {
		astralDir := util.Arg(0)
		dists := readAlignmentDists(astralDir)
		enc := gob.NewEncoder(util.CreateFile(flagGobIt))
		util.Assert(enc.Encode(dists), "Could not GOB encode distances")
		return
	}

	var dists *intern.Table
	if util.IsDir(util.Arg(0)) {
		dists = readAlignmentDists(util.Arg(0))
	} else {
		dec := gob.NewDecoder(util.OpenFile(util.Arg(0)))
		util.Assert(dec.Decode(&dists), "Could not GOB decode distances")
	}

	treeFile := util.Arg(1)
	outPath := util.Arg(2)

	treeReader := newick.NewReader(util.OpenFile(treeFile))
	tree, err := treeReader.ReadTree()
	util.Assert(err, "Could not read newick tree")

	csvw := csv.NewWriter(util.CreateFile(outPath))
	clusters := treeClusters(flagThreshold, dists, tree)
	util.Assert(csvw.WriteAll(clusters))
}
示例#2
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	var f io.Reader
	var err error

	f = util.OpenFile(flag.Arg(0))
	if strings.HasSuffix(flag.Arg(0), ".gz") {
		f, err = gzip.NewReader(f)
		util.Assert(err)
	}
	cifEntry, err := pdbx.Read(f)
	util.Assert(err, "Could not read PDBx/mmCIF file")

	fasEntries := make([]seq.Sequence, 0, 5)
	for _, ent := range cifEntry.Entities {
		for _, chain := range ent.Chains {
			if !isChainUsable(chain) || len(ent.Seq) == 0 {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: ent.Seq,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
示例#3
0
func mkPaired(c *command) {
	c.assertNArg(2)

	in := util.Library(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	util.AssertOverwritable(outPath, flagOverwrite)

	if _, ok := in.(fragbag.WeightedLibrary); ok {
		util.Fatalf("%s is a weighted library (not allowed)", in.Name())
	}

	name := fmt.Sprintf("paired-%s", in.Name())
	if fragbag.IsStructure(in) {
		var pairs [][]structure.Coords
		lib := in.(fragbag.StructureLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Atoms(i), lib.Atoms(j)
				pairs = append(pairs, append(f1, f2...))
			}
		}
		pairLib, err := fragbag.NewStructureAtoms(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "hmm") {
		var pairs []*seq.HMM
		lib := in.(fragbag.SequenceLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM)
				pairs = append(pairs, seq.HMMCat(f1, f2))
			}
		}
		pairLib, err := fragbag.NewSequenceHMM(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "profile") {
		util.Fatalf("Sequence profiles not implemented.")
	} else {
		util.Fatalf("Unrecognized fragment library: %s", in.Tag())
	}
}
示例#4
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	fasInp := util.Arg(0)
	fmapOut := util.Arg(1)

	fmap := util.GetFmap(fasInp)
	util.FmapWrite(util.CreateFile(fmapOut), fmap)
}
示例#5
0
func mkStructure(c *command) {
	c.assertNArg(2)

	brkFile := c.flags.Arg(0)
	saveto := c.flags.Arg(1)

	util.AssertOverwritable(saveto, flagOverwrite)

	brkContents, err := ioutil.ReadAll(util.OpenFile(c.flags.Arg(0)))
	util.Assert(err)

	pdbFragments := bytes.Split(brkContents, []byte("TER"))
	fragments := make([][]structure.Coords, 0)
	for i, pdbFrag := range pdbFragments {
		pdbFrag = bytes.TrimSpace(pdbFrag)
		if len(pdbFrag) == 0 {
			continue
		}
		fragments = append(fragments, coords(i, pdbFrag))
	}

	libName := stripExt(path.Base(brkFile))
	lib, err := fragbag.NewStructureAtoms(libName, fragments)
	util.Assert(err)
	fragbag.Save(util.CreateFile(saveto), lib)
}
示例#6
0
func main() {
	db := util.OpenBowDB(util.Arg(0))
	out := util.CreateFile(util.Arg(1))

	printf := func(format string, v ...interface{}) {
		fmt.Fprintf(out, format, v...)
	}

	// Set our search options.
	bowOpts := bowdb.SearchDefault
	bowOpts.Limit = -1

	printf("QueryID\tResultID\tCosine\tEuclid\n")
	entries, err := db.ReadAll()
	util.Assert(err, "Could not read BOW database entries")

	for _, entry := range entries {
		results := db.Search(bowOpts, entry)

		for _, result := range results {
			printf("%s\t%s\t%0.4f\t%0.4f\n",
				entry.Id, result.Bowed.Id, result.Cosine, result.Euclid)
		}
		printf("\n")
	}
	util.Assert(out.Close())
	util.Assert(db.Close())
}
示例#7
0
func main() {
	saveto := util.CreateFile(util.Arg(0))
	defer saveto.Close()

	w := func(format string, v ...interface{}) {
		_, err := fmt.Fprintf(saveto, format, v...)
		util.Assert(err)
	}

	var fmats []*bufio.Reader
	for _, fmat := range util.Args()[1:] {
		fmats = append(fmats, bufio.NewReader(util.OpenFile(fmat)))
	}
LOOP:
	for {
		var columns int
		scores := make([][]float64, len(fmats)) // matrix -> fields -> sas score
		for i, fmat := range fmats {
			line, err := fmat.ReadBytes('\n')
			if len(line) == 0 && err == io.EOF {
				break LOOP
			} else if err != io.EOF {
				util.Assert(err)
			}

			fields := bytes.Fields(line)
			columns = len(fields)
			scores[i] = make([]float64, columns)
			for j, sas := range fields {
				scores[i][j], err = strconv.ParseFloat(string(sas), 64)
				util.Assert(err)
			}
		}

		before := ""
		for j := 0; j < columns; j++ {
			best := scores[0][j]
			for i := 1; i < len(scores); i++ {
				if scores[i][j] < best {
					best = scores[i][j]
				}
			}
			if best == 0 {
				w("%s0", before)
			} else {
				w("%s%f", before, best)
			}
			before = " "
		}
		w("\n")
	}
}
示例#8
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	in, out := util.Arg(0), util.Arg(1)
	r, w := ioFromFile(in, flagInFmt).r, ioFromFile(out, flagOutFmt).w
	inf := util.OpenFile(in)
	defer inf.Close()

	msa, err := r(inf)
	util.Assert(err, "Error parsing '%s'", in)

	outf := util.CreateFile(out)
	defer outf.Close()
	util.Assert(w(outf, msa), "Error writing '%s'", out)
}
示例#9
0
func main() {
	flag.BoolVar(&flagAllFragments, "all-fragments", flagAllFragments,
		"When set, all fragments will be shown, even if the best fragment\n"+
			"of each residue set is the same.")
	util.FlagParse(
		"fraglib align.{fasta,ali,a2m,a3m} out-csv",
		"Writes a CSV file to out-csv containing the best matching fragment\n"+
			"for each pairwise contiguous set of residues between the\n"+
			"first two proteins in the alignment.")
	util.AssertNArg(3)
	flib := util.SequenceLibrary(util.Arg(0))
	aligned := util.MSA(util.Arg(1))
	outcsv := util.CreateFile(util.Arg(2))

	csvWriter := csv.NewWriter(outcsv)
	csvWriter.Comma = '\t'
	defer csvWriter.Flush()

	pf := func(record ...string) {
		util.Assert(csvWriter.Write(record), "Problem writing to '%s'", outcsv)
	}
	pf("start1", "end1", "start2", "end2", "frag1", "frag2", "rat1", "rat2")
	iter := newContiguous(
		flib.FragmentSize(), aligned.GetFasta(0), aligned.GetFasta(1))
	for iter.next() {
		best1 := flib.BestSequenceFragment(iter.res1)
		best2 := flib.BestSequenceFragment(iter.res2)
		if !flagAllFragments && best1 == best2 {
			continue
		}
		if best1 == -1 || best2 == -1 {
			continue
		}
		p1 := flib.AlignmentProb(best1, iter.res1)
		p2 := flib.AlignmentProb(best2, iter.res2)
		if p1.Distance(p2) > 0.14 {
			continue
		}
		pf(
			fmt.Sprintf("%d", iter.s1()),
			fmt.Sprintf("%d", iter.e1()),
			fmt.Sprintf("%d", iter.s2()),
			fmt.Sprintf("%d", iter.e2()),
			fmt.Sprintf("%d", best1),
			fmt.Sprintf("%d", best2),
			fmt.Sprintf("%f", p1),
			fmt.Sprintf("%f", p2),
		)
	}
}
示例#10
0
文件: main.go 项目: TuftsBCB/flib
func main() {
	var cmd string
	var help bool
	if len(os.Args) < 2 {
		usage()
	} else if strings.TrimLeft(os.Args[1], "-") == "help" {
		if len(os.Args) < 3 {
			usage()
		} else {
			cmd = os.Args[2]
			help = true
		}
	} else {
		cmd = os.Args[1]
	}

	for _, c := range commands {
		if c.name == cmd {
			c.setCommonFlags()
			if c.addFlags != nil {
				c.addFlags(c)
			}
			if help {
				c.showHelp()
			} else {
				c.flags.Usage = c.showUsage
				c.flags.Parse(os.Args[2:])

				if flagCpu < 1 {
					flagCpu = 1
				}
				runtime.GOMAXPROCS(flagCpu)

				if len(flagCpuProfile) > 0 {
					f := util.CreateFile(flagCpuProfile)
					pprof.StartCPUProfile(f)
					defer f.Close()
					defer pprof.StopCPUProfile()
				}

				c.run(c)
				return
			}
		}
	}
	log.Printf("Unknown command '%s'. Run 'flib help' for a list of "+
		"available commands.", cmd)
	os.Exit(1)
}
示例#11
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	inFasta := util.Arg(0)
	outHHM := util.Arg(1)

	hhblits := hhsuite.HHBlitsDefault
	hhmake := hhsuite.HHMakePseudo
	hhblits.Verbose = !flagQuiet
	hhmake.Verbose = !flagQuiet

	HHM, err := hhsuite.BuildHHM(
		hhblits, hhmake, util.FlagSeqDB, inFasta)
	util.Assert(err, "Error building HHM")

	util.Assert(hmm.WriteHHM(util.CreateFile(outHHM), HHM),
		"Error writing HHM '%s'", outHHM)
}
示例#12
0
func main() {
	flag.BoolVar(&flagAllFragments, "all-fragments", flagAllFragments,
		"When set, all fragments will be shown, even if the best fragment\n"+
			"of each ATOM set is the same.")
	util.FlagParse(
		"fraglib align.{fasta,ali,a2m,a3m} pdb-file out-csv",
		"Writes a CSV file to out-csv containing the best matching fragment\n"+
			"for each pairwise contiguous set of alpha-carbon atoms of the\n"+
			"first two proteins in the alignment and PDB file.")
	util.AssertNArg(4)
	flib := util.StructureLibrary(util.Arg(0))
	aligned := util.MSA(util.Arg(1))
	pentry := util.PDBRead(util.Arg(2))
	outcsv := util.CreateFile(util.Arg(3))

	csvWriter := csv.NewWriter(outcsv)
	csvWriter.Comma = '\t'
	defer csvWriter.Flush()

	pf := func(record ...string) {
		util.Assert(csvWriter.Write(record), "Problem writing to '%s'", outcsv)
	}
	pf("start1", "end1", "start2", "end2", "frag1", "frag2", "frag_rmsd")
	iter := newContiguous(
		flib.FragmentSize(),
		aligned.GetFasta(0), aligned.GetFasta(1),
		pentry.Chains[0], pentry.Chains[1])
	for iter.next() {
		best1 := flib.BestStructureFragment(iter.atoms1)
		best2 := flib.BestStructureFragment(iter.atoms2)
		if !flagAllFragments && best1 == best2 {
			continue
		}
		bestRmsd := structure.RMSD(flib.Atoms(best1), flib.Atoms(best2))
		pf(
			fmt.Sprintf("%d", iter.s1()),
			fmt.Sprintf("%d", iter.e1()),
			fmt.Sprintf("%d", iter.s2()),
			fmt.Sprintf("%d", iter.e2()),
			fmt.Sprintf("%d", best1),
			fmt.Sprintf("%d", best2),
			fmt.Sprintf("%f", bestRmsd),
		)
	}
}
示例#13
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	a3mPath := util.Arg(0)
	fa3m := util.OpenFile(a3mPath)

	freader := fasta.NewReader(fa3m)
	freader.TrustSequences = true
	seqs, err := freader.ReadAll()
	util.Assert(err, "Could not read fasta format '%s'", a3mPath)
	util.Assert(fa3m.Close())

	w := util.CreateFile(a3mPath)
	fwriter := fasta.NewWriter(w)
	fwriter.Columns = 0
	for _, seq := range seqs {
		if len(seq.Residues) > 0 {
			util.Assert(fwriter.Write(seq))
		}
	}
	util.Assert(fwriter.Flush())
	util.Assert(w.Close())
}
示例#14
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	libPath := util.Arg(0)
	chain := util.Arg(1)
	pdbEntryPath := util.Arg(2)
	bowOut := util.Arg(3)

	lib := util.StructureLibrary(libPath)
	entry := util.PDBRead(pdbEntryPath)

	thechain := entry.Chain(chain[0])
	if thechain == nil || !thechain.IsProtein() {
		util.Fatalf("Could not find chain with identifier '%c'.", chain[0])
	}

	bow := bow.BowerFromChain(thechain).StructureBow(lib)
	if bowOut == "--" {
		fmt.Println(bow)
	} else {
		util.BowWrite(util.CreateFile(bowOut), bow)
	}
}
示例#15
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	rfasta := util.OpenFasta(util.Arg(0))
	dir := util.Arg(1)
	util.Assert(os.MkdirAll(dir, 0777))

	fr := fasta.NewReader(rfasta)
	for {
		s, err := fr.Read()
		if err != nil {
			if err == io.EOF {
				break
			}
			util.Assert(err)
		}

		s.Name = strings.Fields(s.Name)[0]
		fw := util.CreateFile(path.Join(dir, s.Name+".fasta"))
		w := fasta.NewWriter(fw)
		util.Assert(w.Write(s))
		util.Assert(w.Flush())
		util.Assert(fw.Close())
	}
}
示例#16
0
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
示例#17
0
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}
	vectors := readVectors(util.Arg(1))
	groups := readCathGroups(util.Args()[2:])
	out := util.CreateFile(util.Arg(0))
	defer out.Close()

	pf := func(format string, v ...interface{}) {
		// fmt.Printf(format, v...)
		fmt.Fprintf(out, format, v...)
	}

	type labeledPval struct {
		Name1, Name2 string
		Pval         float32
	}
	b := stdb(vectors, groups)
	pairs := combinations(len(groups))
	dopairs, pvals := make(chan pair), make(chan labeledPval)
	wg := new(sync.WaitGroup)
	for i := 0; i < util.FlagCpu; i++ {
		wg.Add(1)
		go func() {
			for p := range dopairs {
				g1, g2 := groups[p.i], groups[p.j]
				b1, b2 := b[p.i], b[p.j]
				bm1, bm2 := bmean(b1, b2)
				bw := delta(bm1, bm2)

				randws := make([]float32, 1000)
				for i := range randws {
					randws[i] = delta(shuffle_mean_rows(b1, b2))
				}

				bigger := 0
				for _, rw := range randws {
					if rw >= bw {
						bigger++
					}
				}
				pval := float32(bigger) / float32(len(randws))
				pvals <- labeledPval{g1.Name, g2.Name, pval}
			}
			wg.Done()
		}()
	}

	done := make(chan struct{})
	go func() {
		sig, cutoff := 0, 0.05/float32(len(pairs))
		for pval := range pvals {
			if pval.Pval < cutoff {
				sig++
			}
			pf("%s\t%s\t%f\n", pval.Name1, pval.Name2, pval.Pval)
		}
		pf("significant\t%d/%d\t(cutoff: %f)\n", sig, len(pairs), cutoff)
		done <- struct{}{}
	}()
	for _, p := range pairs {
		dopairs <- p
	}
	close(dopairs)
	wg.Wait()
	close(pvals)
	<-done
}
示例#18
0
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}

	// Read all CATH domains, the best-of-all matrix, and the matrix for
	// each aligner.
	domains := readDomains(util.Arg(0))
	boa := readMatrix(domains, util.Arg(1))
	aligners := make([]aligner, 0)
	flibs := make([]flib, 0)
	for i := 2; i < util.NArg(); i += 2 {
		fpath := util.Arg(i)
		if path.Ext(fpath) == ".bowdb" {
			db := util.OpenBowDB(fpath)
			records, err := db.ReadAll()
			util.Assert(err)

			bowed := make([]bow.Bowed, domains.in.Len())
			for _, b := range records {
				if !domains.in.Exists(b.Id) {
					util.Fatalf("Found ID in bowdb that isn't in the list "+
						"of CATH domains provided: %s", b.Id)
				}
				bowed[domains.in.Atom(b.Id)] = b
			}
			flibs = append(flibs, flib{db, bowed, util.Arg(i + 1)})
		} else {
			aligners = append(aligners, aligner{
				readMatrix(domains, fpath),
				util.Arg(i + 1),
			})
		}
	}
	// Now remove CATH domains that don't have a corresponding structure file.
	// We don't do this initially since the matrix files are indexed with
	// respect to all CATH domains (includings ones without structure).
	// This is an artifact of the fact that the matrices were generated with
	// a very old version of CATH.
	domains.removeOldDomains()

	if a := matrixAuc(domains, boa, boa, flagThreshold); a != 1.0 {
		util.Fatalf("Something is wrong. The AUC of the best-of-all matrix "+
			"with respect to itself is %f, but it should be 1.0.", a)
	}

	if len(aligners) > 0 {
		fmt.Println("Computing AUC for aligners...")
		writeAuc := func(aligner aligner) struct{} {
			w := util.CreateFile(aligner.outpath)
			a := matrixAuc(domains, boa, aligner.matrix, flagThreshold)
			fmt.Fprintf(w, "%f\n", a)
			return struct{}{}
		}
		fun.ParMap(writeAuc, aligners)
	}
	if len(flibs) > 0 {
		fmt.Println("Computing AUC for bowdbs...")
		writeAuc := func(flib flib) struct{} {
			w := util.CreateFile(flib.outpath)
			a := flibAuc(domains, boa, flib, flagThreshold)
			fmt.Fprintf(w, "%f\n", a)
			return struct{}{}
		}
		fun.ParMap(writeAuc, flibs)
	}
}
示例#19
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	pdbEntry := util.PDBRead(flag.Arg(0))

	fasEntries := make([]seq.Sequence, 0, 5)
	if !flagSeparateChains {
		var fasEntry seq.Sequence
		if len(pdbEntry.Chains) == 1 {
			fasEntry.Name = chainHeader(pdbEntry.OneChain())
		} else {
			fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode))
		}

		seq := make([]seq.Residue, 0, 100)
		for _, chain := range pdbEntry.Chains {
			if isChainUsable(chain) {
				seq = append(seq, chain.Sequence...)
			}
		}
		fasEntry.Residues = seq

		if len(fasEntry.Residues) == 0 {
			util.Fatalf("Could not find any amino acids.")
		}
		fasEntries = append(fasEntries, fasEntry)
	} else {
		for _, chain := range pdbEntry.Chains {
			if !isChainUsable(chain) {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: chain.Sequence,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
示例#20
0
func mkSeqProfile(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Initialize a frequency and null profile for each structural fragment.
	var freqProfiles []*seq.FrequencyProfile
	var fpChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		fp := seq.NewFrequencyProfile(structLib.FragmentSize())
		freqProfiles = append(freqProfiles, fp)
		fpChans = append(fpChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	nullChan, nullProfile := addToNull()
	for i := 0; i < structLib.Size(); i++ {
		addToProfile(fpChans[i], freqProfiles[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nullChan, fpChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	close(nullChan)
	for i := 0; i < structLib.Size(); i++ {
		close(fpChans[i])
	}
	wgSeqFragments.Wait()

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	profs := make([]*seq.Profile, structLib.Size())
	for i := 0; i < structLib.Size(); i++ {
		profs[i] = freqProfiles[i].Profile(nullProfile)
	}
	lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
示例#21
0
文件: main.go 项目: TuftsBCB/tools
func main() {
	lib := util.StructureLibrary(util.Arg(0))
	fmap := util.FmapRead(util.Arg(1))
	util.BowWrite(util.CreateFile(util.Arg(2)), fmap.StructureBow(lib))
}
示例#22
-1
文件: main.go 项目: TuftsBCB/tools
func main() {
	outDir := util.Arg(0)
	fasInps := util.Args()[1:]

	util.Assert(os.MkdirAll(outDir, 0777))

	fastaChan := make(chan string)
	wg := new(sync.WaitGroup)
	for i := 0; i < max(1, runtime.GOMAXPROCS(0)); i++ {
		go func() {
			wg.Add(1)
			for fasta := range fastaChan {
				util.Verbosef("Computing map for '%s'...", fasta)
				fmap := util.GetFmap(fasta)
				outF := path.Join(outDir, fmt.Sprintf("%s.fmap", fmap.Name))
				util.FmapWrite(util.CreateFile(outF), fmap)
			}
			wg.Done()
		}()
	}

	for _, fasta := range fasInps {
		fastaChan <- fasta
	}

	close(fastaChan)
	wg.Wait()
}