Ejemplo n.º 1
0
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}
	if len(flagGobIt) > 0 {
		astralDir := util.Arg(0)
		dists := readAlignmentDists(astralDir)
		enc := gob.NewEncoder(util.CreateFile(flagGobIt))
		util.Assert(enc.Encode(dists), "Could not GOB encode distances")
		return
	}

	var dists *intern.Table
	if util.IsDir(util.Arg(0)) {
		dists = readAlignmentDists(util.Arg(0))
	} else {
		dec := gob.NewDecoder(util.OpenFile(util.Arg(0)))
		util.Assert(dec.Decode(&dists), "Could not GOB decode distances")
	}

	treeFile := util.Arg(1)
	outPath := util.Arg(2)

	treeReader := newick.NewReader(util.OpenFile(treeFile))
	tree, err := treeReader.ReadTree()
	util.Assert(err, "Could not read newick tree")

	csvw := csv.NewWriter(util.CreateFile(outPath))
	clusters := treeClusters(flagThreshold, dists, tree)
	util.Assert(csvw.WriteAll(clusters))
}
Ejemplo n.º 2
0
func main() {
	var f io.Reader
	var err error

	f = util.OpenFile(flag.Arg(0))
	if strings.HasSuffix(flag.Arg(0), ".gz") {
		f, err = gzip.NewReader(f)
		util.Assert(err)
	}
	cifEntry, err := pdbx.Read(f)
	util.Assert(err, "Could not read PDBx/mmCIF file")

	fasEntries := make([]seq.Sequence, 0, 5)
	for _, ent := range cifEntry.Entities {
		for _, chain := range ent.Chains {
			if !isChainUsable(chain) || len(ent.Seq) == 0 {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: ent.Seq,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
Ejemplo n.º 3
0
func main() {
	hhmFile := util.Arg(0)
	start := util.ParseInt(util.Arg(1))
	end := util.ParseInt(util.Arg(2))

	fhhm := util.OpenFile(hhmFile)

	qhhm, err := hmm.ReadHHM(fhhm)
	util.Assert(err)

	util.Assert(hmm.WriteHHM(os.Stdout, qhhm.Slice(start, end)))
}
Ejemplo n.º 4
0
func main() {
	pdbs := util.OpenFile(flag.Arg(0))
	defer pdbs.Close()

	entries, err := slct.NewReader(pdbs).ReadAll()
	util.Assert(err)

	for _, entry := range entries {
		if flagPaths {
			fmt.Println(util.PDBPath(entry.ChainID))
		} else {
			fmt.Println(entry.ChainID)
		}
	}
}
Ejemplo n.º 5
0
func main() {
	a3mPath := util.Arg(0)
	fa3m := util.OpenFile(a3mPath)

	freader := fasta.NewReader(fa3m)
	freader.TrustSequences = true
	seqs, err := freader.ReadAll()
	util.Assert(err, "Could not read fasta format '%s'", a3mPath)
	util.Assert(fa3m.Close())

	w := util.CreateFile(a3mPath)
	fwriter := fasta.NewWriter(w)
	fwriter.Columns = 0
	for _, seq := range seqs {
		if len(seq.Residues) > 0 {
			util.Assert(fwriter.Write(seq))
		}
	}
	util.Assert(fwriter.Flush())
	util.Assert(w.Close())
}
Ejemplo n.º 6
0
func readAlignmentDists(dir string) *intern.Table {
	dists := intern.NewTable(11000)
	threads := util.FlagCpu
	addDists := make(chan []pair)
	alignFile := make(chan string)
	done := make(chan struct{})

	go func() {
		for fileDists := range addDists {
			for _, pair := range fileDists {
				a1, a2 := dists.Atom(pair.key[0]), dists.Atom(pair.key[1])
				dists.Set(a1, a2, pair.dist)
			}
		}
		done <- struct{}{}
	}()

	wg := new(sync.WaitGroup)
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			for fpath := range alignFile {
				log.Printf("Reading %s (%s)", fpath, time.Now())

				f := util.OpenFile(fpath)
				defer f.Close()

				csvr := csv.NewReader(f)
				csvr.Comma = '\t'
				csvr.TrimLeadingSpace = true
				csvr.FieldsPerRecord = -1 // data is poorly formatted

				records, err := csvr.ReadAll()
				util.Assert(err, "[%s]", fpath)

				fileDists := make([]pair, 0, 100000)
				for _, record := range records {
					if len(record) != 9 {
						continue
					}
					p := recordToDist(record)
					fileDists = append(fileDists, p)
				}
				addDists <- fileDists
			}
			wg.Done()
		}()
	}

	for _, fpath := range util.RecursiveFiles(dir) {
		if strings.HasPrefix(path.Base(fpath), ".") {
			continue
		}
		alignFile <- fpath
	}
	close(alignFile)
	wg.Wait()
	close(addDists)
	<-done
	return dists
}