Exemplo n.º 1
0
func main() {
	if len(util.FlagCpuProf) > 0 {
		f := util.CreateFile(util.FlagCpuProf)
		pprof.StartCPUProfile(f)
		defer f.Close()
		defer pprof.StopCPUProfile()
	}
	if len(flagGobIt) > 0 {
		astralDir := util.Arg(0)
		dists := readAlignmentDists(astralDir)
		enc := gob.NewEncoder(util.CreateFile(flagGobIt))
		util.Assert(enc.Encode(dists), "Could not GOB encode distances")
		return
	}

	var dists *intern.Table
	if util.IsDir(util.Arg(0)) {
		dists = readAlignmentDists(util.Arg(0))
	} else {
		dec := gob.NewDecoder(util.OpenFile(util.Arg(0)))
		util.Assert(dec.Decode(&dists), "Could not GOB decode distances")
	}

	treeFile := util.Arg(1)
	outPath := util.Arg(2)

	treeReader := newick.NewReader(util.OpenFile(treeFile))
	tree, err := treeReader.ReadTree()
	util.Assert(err, "Could not read newick tree")

	csvw := csv.NewWriter(util.CreateFile(outPath))
	clusters := treeClusters(flagThreshold, dists, tree)
	util.Assert(csvw.WriteAll(clusters))
}
Exemplo n.º 2
0
func mkStructure(c *command) {
	c.assertNArg(2)

	brkFile := c.flags.Arg(0)
	saveto := c.flags.Arg(1)

	util.AssertOverwritable(saveto, flagOverwrite)

	brkContents, err := ioutil.ReadAll(util.OpenFile(c.flags.Arg(0)))
	util.Assert(err)

	pdbFragments := bytes.Split(brkContents, []byte("TER"))
	fragments := make([][]structure.Coords, 0)
	for i, pdbFrag := range pdbFragments {
		pdbFrag = bytes.TrimSpace(pdbFrag)
		if len(pdbFrag) == 0 {
			continue
		}
		fragments = append(fragments, coords(i, pdbFrag))
	}

	libName := stripExt(path.Base(brkFile))
	lib, err := fragbag.NewStructureAtoms(libName, fragments)
	util.Assert(err)
	fragbag.Save(util.CreateFile(saveto), lib)
}
Exemplo n.º 3
0
func main() {
	var f io.Reader
	var err error

	f = util.OpenFile(flag.Arg(0))
	if strings.HasSuffix(flag.Arg(0), ".gz") {
		f, err = gzip.NewReader(f)
		util.Assert(err)
	}
	cifEntry, err := pdbx.Read(f)
	util.Assert(err, "Could not read PDBx/mmCIF file")

	fasEntries := make([]seq.Sequence, 0, 5)
	for _, ent := range cifEntry.Entities {
		for _, chain := range ent.Chains {
			if !isChainUsable(chain) || len(ent.Seq) == 0 {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: ent.Seq,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
Exemplo n.º 4
0
func main() {
	saveto := util.CreateFile(util.Arg(0))
	defer saveto.Close()

	w := func(format string, v ...interface{}) {
		_, err := fmt.Fprintf(saveto, format, v...)
		util.Assert(err)
	}

	var fmats []*bufio.Reader
	for _, fmat := range util.Args()[1:] {
		fmats = append(fmats, bufio.NewReader(util.OpenFile(fmat)))
	}
LOOP:
	for {
		var columns int
		scores := make([][]float64, len(fmats)) // matrix -> fields -> sas score
		for i, fmat := range fmats {
			line, err := fmat.ReadBytes('\n')
			if len(line) == 0 && err == io.EOF {
				break LOOP
			} else if err != io.EOF {
				util.Assert(err)
			}

			fields := bytes.Fields(line)
			columns = len(fields)
			scores[i] = make([]float64, columns)
			for j, sas := range fields {
				scores[i][j], err = strconv.ParseFloat(string(sas), 64)
				util.Assert(err)
			}
		}

		before := ""
		for j := 0; j < columns; j++ {
			best := scores[0][j]
			for i := 1; i < len(scores); i++ {
				if scores[i][j] < best {
					best = scores[i][j]
				}
			}
			if best == 0 {
				w("%s0", before)
			} else {
				w("%s%f", before, best)
			}
			before = " "
		}
		w("\n")
	}
}
Exemplo n.º 5
0
func main() {
	in, out := util.Arg(0), util.Arg(1)
	r, w := ioFromFile(in, flagInFmt).r, ioFromFile(out, flagOutFmt).w
	inf := util.OpenFile(in)
	defer inf.Close()

	msa, err := r(inf)
	util.Assert(err, "Error parsing '%s'", in)

	outf := util.CreateFile(out)
	defer outf.Close()
	util.Assert(w(outf, msa), "Error writing '%s'", out)
}
Exemplo n.º 6
0
func main() {
	pdbs := util.OpenFile(flag.Arg(0))
	defer pdbs.Close()

	entries, err := slct.NewReader(pdbs).ReadAll()
	util.Assert(err)

	for _, entry := range entries {
		if flagPaths {
			fmt.Println(util.PDBPath(entry.ChainID))
		} else {
			fmt.Println(entry.ChainID)
		}
	}
}
Exemplo n.º 7
0
func readVectors(fpath string) map[string]bow.Bow {
	f := util.OpenFile(fpath)
	defer f.Close()

	bows := make(map[string]bow.Bow, 5000)
	for _, line := range util.ReadLines(f) {
		fields := strings.Fields(line)
		b := bow.NewBow(len(fields[1:]))
		for _, sfreq := range fields[1:] {
			freq, err := strconv.ParseFloat(sfreq, 32)
			util.Assert(err)
			b.Freqs = append(b.Freqs, float32(freq))
		}
		bows[fields[0]] = b
	}
	return bows
}
Exemplo n.º 8
0
func readDomains(fpath string) *inDomains {
	domains := &inDomains{
		intern.NewInterner(),
		make([]string, 0, 2000),
		make([]intern.Atom, 0, 2000),
	}

	scanner := bufio.NewScanner(util.OpenFile(fpath))
	for scanner.Scan() {
		d := strings.Fields(scanner.Text())[0]
		d = stripExt(path.Base(util.CathPath(d)))
		a := domains.in.Atom(d)
		domains.ids = append(domains.ids, d)
		domains.atoms = append(domains.atoms, a)
	}
	util.Assert(scanner.Err())
	return domains
}
Exemplo n.º 9
0
func main() {
	a3mPath := util.Arg(0)
	fa3m := util.OpenFile(a3mPath)

	freader := fasta.NewReader(fa3m)
	freader.TrustSequences = true
	seqs, err := freader.ReadAll()
	util.Assert(err, "Could not read fasta format '%s'", a3mPath)
	util.Assert(fa3m.Close())

	w := util.CreateFile(a3mPath)
	fwriter := fasta.NewWriter(w)
	fwriter.Columns = 0
	for _, seq := range seqs {
		if len(seq.Residues) > 0 {
			util.Assert(fwriter.Write(seq))
		}
	}
	util.Assert(fwriter.Flush())
	util.Assert(w.Close())
}
Exemplo n.º 10
0
func readMatrix(domains *inDomains, fpath string) *intern.Table {
	var (
		err  error
		fval float64
		sval string
	)
	tab := intern.NewTableInterner(domains.in)
	scanner := bufio.NewScanner(util.OpenFile(fpath))
	for i := 0; scanner.Scan(); i++ {
		// It'd be much simpler to use Split here, but let's be quicker.
		// In particular, avoid allocating.
		// Also, we're dealing with the line as a string since it's quicker
		// than using bytes and converting each number to a string for
		// strconv.ParseFloat.
		line := scanner.Text()
		bstart, j := 0, -1
		for bend, b := range scanner.Text() {
			// This actually skips the very last element in the table, but
			// it's OK because the value at [k, k] is always 0.
			switch {
			case b == ' ' || b == '\n' || bend+1 == len(line):
				sval = line[bstart:bend]
				bstart = bend + 1
				j++
				// falls down to process this value
			default:
				continue
			}
			if j > i && len(sval) > 0 { // upper triangular
				fval, err = strconv.ParseFloat(sval, 64)
				if err != nil {
					panic(err)
				}
				tab.Set(domains.atoms[i], domains.atoms[j], fval)
			}
		}
	}
	util.Assert(scanner.Err())
	return tab
}
Exemplo n.º 11
0
func readAlignmentDists(dir string) *intern.Table {
	dists := intern.NewTable(11000)
	threads := util.FlagCpu
	addDists := make(chan []pair)
	alignFile := make(chan string)
	done := make(chan struct{})

	go func() {
		for fileDists := range addDists {
			for _, pair := range fileDists {
				a1, a2 := dists.Atom(pair.key[0]), dists.Atom(pair.key[1])
				dists.Set(a1, a2, pair.dist)
			}
		}
		done <- struct{}{}
	}()

	wg := new(sync.WaitGroup)
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			for fpath := range alignFile {
				log.Printf("Reading %s (%s)", fpath, time.Now())

				f := util.OpenFile(fpath)
				defer f.Close()

				csvr := csv.NewReader(f)
				csvr.Comma = '\t'
				csvr.TrimLeadingSpace = true
				csvr.FieldsPerRecord = -1 // data is poorly formatted

				records, err := csvr.ReadAll()
				util.Assert(err, "[%s]", fpath)

				fileDists := make([]pair, 0, 100000)
				for _, record := range records {
					if len(record) != 9 {
						continue
					}
					p := recordToDist(record)
					fileDists = append(fileDists, p)
				}
				addDists <- fileDists
			}
			wg.Done()
		}()
	}

	for _, fpath := range util.RecursiveFiles(dir) {
		if strings.HasPrefix(path.Base(fpath), ".") {
			continue
		}
		alignFile <- fpath
	}
	close(alignFile)
	wg.Wait()
	close(addDists)
	<-done
	return dists
}