func main() { if len(util.FlagCpuProf) > 0 { f := util.CreateFile(util.FlagCpuProf) pprof.StartCPUProfile(f) defer f.Close() defer pprof.StopCPUProfile() } if len(flagGobIt) > 0 { astralDir := util.Arg(0) dists := readAlignmentDists(astralDir) enc := gob.NewEncoder(util.CreateFile(flagGobIt)) util.Assert(enc.Encode(dists), "Could not GOB encode distances") return } var dists *intern.Table if util.IsDir(util.Arg(0)) { dists = readAlignmentDists(util.Arg(0)) } else { dec := gob.NewDecoder(util.OpenFile(util.Arg(0))) util.Assert(dec.Decode(&dists), "Could not GOB decode distances") } treeFile := util.Arg(1) outPath := util.Arg(2) treeReader := newick.NewReader(util.OpenFile(treeFile)) tree, err := treeReader.ReadTree() util.Assert(err, "Could not read newick tree") csvw := csv.NewWriter(util.CreateFile(outPath)) clusters := treeClusters(flagThreshold, dists, tree) util.Assert(csvw.WriteAll(clusters)) }
func mkStructure(c *command) { c.assertNArg(2) brkFile := c.flags.Arg(0) saveto := c.flags.Arg(1) util.AssertOverwritable(saveto, flagOverwrite) brkContents, err := ioutil.ReadAll(util.OpenFile(c.flags.Arg(0))) util.Assert(err) pdbFragments := bytes.Split(brkContents, []byte("TER")) fragments := make([][]structure.Coords, 0) for i, pdbFrag := range pdbFragments { pdbFrag = bytes.TrimSpace(pdbFrag) if len(pdbFrag) == 0 { continue } fragments = append(fragments, coords(i, pdbFrag)) } libName := stripExt(path.Base(brkFile)) lib, err := fragbag.NewStructureAtoms(libName, fragments) util.Assert(err) fragbag.Save(util.CreateFile(saveto), lib) }
func main() { var f io.Reader var err error f = util.OpenFile(flag.Arg(0)) if strings.HasSuffix(flag.Arg(0), ".gz") { f, err = gzip.NewReader(f) util.Assert(err) } cifEntry, err := pdbx.Read(f) util.Assert(err, "Could not read PDBx/mmCIF file") fasEntries := make([]seq.Sequence, 0, 5) for _, ent := range cifEntry.Entities { for _, chain := range ent.Chains { if !isChainUsable(chain) || len(ent.Seq) == 0 { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: ent.Seq, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
func main() { saveto := util.CreateFile(util.Arg(0)) defer saveto.Close() w := func(format string, v ...interface{}) { _, err := fmt.Fprintf(saveto, format, v...) util.Assert(err) } var fmats []*bufio.Reader for _, fmat := range util.Args()[1:] { fmats = append(fmats, bufio.NewReader(util.OpenFile(fmat))) } LOOP: for { var columns int scores := make([][]float64, len(fmats)) // matrix -> fields -> sas score for i, fmat := range fmats { line, err := fmat.ReadBytes('\n') if len(line) == 0 && err == io.EOF { break LOOP } else if err != io.EOF { util.Assert(err) } fields := bytes.Fields(line) columns = len(fields) scores[i] = make([]float64, columns) for j, sas := range fields { scores[i][j], err = strconv.ParseFloat(string(sas), 64) util.Assert(err) } } before := "" for j := 0; j < columns; j++ { best := scores[0][j] for i := 1; i < len(scores); i++ { if scores[i][j] < best { best = scores[i][j] } } if best == 0 { w("%s0", before) } else { w("%s%f", before, best) } before = " " } w("\n") } }
func main() { in, out := util.Arg(0), util.Arg(1) r, w := ioFromFile(in, flagInFmt).r, ioFromFile(out, flagOutFmt).w inf := util.OpenFile(in) defer inf.Close() msa, err := r(inf) util.Assert(err, "Error parsing '%s'", in) outf := util.CreateFile(out) defer outf.Close() util.Assert(w(outf, msa), "Error writing '%s'", out) }
func main() { pdbs := util.OpenFile(flag.Arg(0)) defer pdbs.Close() entries, err := slct.NewReader(pdbs).ReadAll() util.Assert(err) for _, entry := range entries { if flagPaths { fmt.Println(util.PDBPath(entry.ChainID)) } else { fmt.Println(entry.ChainID) } } }
func readVectors(fpath string) map[string]bow.Bow { f := util.OpenFile(fpath) defer f.Close() bows := make(map[string]bow.Bow, 5000) for _, line := range util.ReadLines(f) { fields := strings.Fields(line) b := bow.NewBow(len(fields[1:])) for _, sfreq := range fields[1:] { freq, err := strconv.ParseFloat(sfreq, 32) util.Assert(err) b.Freqs = append(b.Freqs, float32(freq)) } bows[fields[0]] = b } return bows }
func readDomains(fpath string) *inDomains { domains := &inDomains{ intern.NewInterner(), make([]string, 0, 2000), make([]intern.Atom, 0, 2000), } scanner := bufio.NewScanner(util.OpenFile(fpath)) for scanner.Scan() { d := strings.Fields(scanner.Text())[0] d = stripExt(path.Base(util.CathPath(d))) a := domains.in.Atom(d) domains.ids = append(domains.ids, d) domains.atoms = append(domains.atoms, a) } util.Assert(scanner.Err()) return domains }
func main() { a3mPath := util.Arg(0) fa3m := util.OpenFile(a3mPath) freader := fasta.NewReader(fa3m) freader.TrustSequences = true seqs, err := freader.ReadAll() util.Assert(err, "Could not read fasta format '%s'", a3mPath) util.Assert(fa3m.Close()) w := util.CreateFile(a3mPath) fwriter := fasta.NewWriter(w) fwriter.Columns = 0 for _, seq := range seqs { if len(seq.Residues) > 0 { util.Assert(fwriter.Write(seq)) } } util.Assert(fwriter.Flush()) util.Assert(w.Close()) }
func readMatrix(domains *inDomains, fpath string) *intern.Table { var ( err error fval float64 sval string ) tab := intern.NewTableInterner(domains.in) scanner := bufio.NewScanner(util.OpenFile(fpath)) for i := 0; scanner.Scan(); i++ { // It'd be much simpler to use Split here, but let's be quicker. // In particular, avoid allocating. // Also, we're dealing with the line as a string since it's quicker // than using bytes and converting each number to a string for // strconv.ParseFloat. line := scanner.Text() bstart, j := 0, -1 for bend, b := range scanner.Text() { // This actually skips the very last element in the table, but // it's OK because the value at [k, k] is always 0. switch { case b == ' ' || b == '\n' || bend+1 == len(line): sval = line[bstart:bend] bstart = bend + 1 j++ // falls down to process this value default: continue } if j > i && len(sval) > 0 { // upper triangular fval, err = strconv.ParseFloat(sval, 64) if err != nil { panic(err) } tab.Set(domains.atoms[i], domains.atoms[j], fval) } } } util.Assert(scanner.Err()) return tab }
func readAlignmentDists(dir string) *intern.Table { dists := intern.NewTable(11000) threads := util.FlagCpu addDists := make(chan []pair) alignFile := make(chan string) done := make(chan struct{}) go func() { for fileDists := range addDists { for _, pair := range fileDists { a1, a2 := dists.Atom(pair.key[0]), dists.Atom(pair.key[1]) dists.Set(a1, a2, pair.dist) } } done <- struct{}{} }() wg := new(sync.WaitGroup) for i := 0; i < threads; i++ { wg.Add(1) go func() { for fpath := range alignFile { log.Printf("Reading %s (%s)", fpath, time.Now()) f := util.OpenFile(fpath) defer f.Close() csvr := csv.NewReader(f) csvr.Comma = '\t' csvr.TrimLeadingSpace = true csvr.FieldsPerRecord = -1 // data is poorly formatted records, err := csvr.ReadAll() util.Assert(err, "[%s]", fpath) fileDists := make([]pair, 0, 100000) for _, record := range records { if len(record) != 9 { continue } p := recordToDist(record) fileDists = append(fileDists, p) } addDists <- fileDists } wg.Done() }() } for _, fpath := range util.RecursiveFiles(dir) { if strings.HasPrefix(path.Base(fpath), ".") { continue } alignFile <- fpath } close(alignFile) wg.Wait() close(addDists) <-done return dists }