Exemplo n.º 1
0
func main() {
	var f io.Reader
	var err error

	f = util.OpenFile(flag.Arg(0))
	if strings.HasSuffix(flag.Arg(0), ".gz") {
		f, err = gzip.NewReader(f)
		util.Assert(err)
	}
	cifEntry, err := pdbx.Read(f)
	util.Assert(err, "Could not read PDBx/mmCIF file")

	fasEntries := make([]seq.Sequence, 0, 5)
	for _, ent := range cifEntry.Entities {
		for _, chain := range ent.Chains {
			if !isChainUsable(chain) || len(ent.Seq) == 0 {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: ent.Seq,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
Exemplo n.º 2
0
func TranslateQuerySeqs(
	query *bytes.Reader, action SearchOperator) (*bytes.Reader, error) {

	buf := new(bytes.Buffer)
	f := fasta.NewWriter(buf)
	reader := fasta.NewReader(query)
	for i := 0; true; i++ {
		sequence, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, err
		}
		origSeq := sequence.Bytes()
		n := sequence.Name
		// generate 6 ORFs
		transSeqs := Translate(origSeq)
		for _, s := range transSeqs {
			result := seq.NewSequenceString(n, string(Reduce(s)))
			f.Write(result)
		}

	}

	return bytes.NewReader(buf.Bytes()), nil
}
Exemplo n.º 3
0
Arquivo: msa.go Projeto: ndaniels/io-1
func write(writer io.Writer, msa seq.MSA, formatter formatSeq) error {
	w := fasta.NewWriter(writer)
	w.Asterisk = false
	w.Columns = 0
	for row := range msa.Entries {
		if err := w.Write(formatter(row)); err != nil {
			return err
		}
	}
	return w.Flush()
}
Exemplo n.º 4
0
func main() {
	if flag.NArg() < 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		cablastp.Verbose = true
	}

	// Open the fasta file specified for writing.
	outFasta, err := os.Create(flag.Arg(1))
	if err != nil {
		fatalf("Could not write to '%s': %s\n", flag.Arg(1), err)
	}
	fastaWriter := fasta.NewWriter(outFasta)
	fastaWriter.Asterisk = true

	// Create a new database for writing. If we're appending, we load
	// the coarse database into memory, and setup the database for writing.
	db, err := cablastp.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}
	cablastp.Vprintln("")

	// Start the CPU profile after all of the data has been read.
	if len(flagCpuProfile) > 0 {
		f, err := os.Create(flagCpuProfile)
		if err != nil {
			fatalf("%s\n", err)
		}
		pprof.StartCPUProfile(f)
	}

	numSeqs := db.ComDB.NumSequences()
	for orgSeqId := 0; orgSeqId < numSeqs; orgSeqId++ {
		oseq, err := db.ComDB.ReadSeq(db.CoarseDB, orgSeqId)
		if err != nil {
			fatalf("Error reading seq id '%d': %s\n", orgSeqId, err)
		}
		if err := fastaWriter.Write(oseq.FastaSeq()); err != nil {
			cablastp.Vprintf("Error writing seq '%s': %s\n", oseq.Name, err)
		}
	}

	cleanup(db)
	if err = fastaWriter.Flush(); err != nil {
		fatalf("%s\n", err)
	}
	if err = outFasta.Close(); err != nil {
		fatalf("%s\n", err)
	}
}
Exemplo n.º 5
0
func queryReader(queries []seq.Sequence) io.Reader {
	if len(queries) == 0 {
		return nil
	}

	buf := new(bytes.Buffer)
	w := fasta.NewWriter(buf)
	if err := w.WriteAll(queries); err != nil {
		// I don't think this is possible unless the underlying byte buffer
		// becomes too big for it to grow any more.
		panic(err)
	}
	return buf
}
Exemplo n.º 6
0
func main() {
	a3mPath := util.Arg(0)
	fa3m := util.OpenFile(a3mPath)

	freader := fasta.NewReader(fa3m)
	freader.TrustSequences = true
	seqs, err := freader.ReadAll()
	util.Assert(err, "Could not read fasta format '%s'", a3mPath)
	util.Assert(fa3m.Close())

	w := util.CreateFile(a3mPath)
	fwriter := fasta.NewWriter(w)
	fwriter.Columns = 0
	for _, seq := range seqs {
		if len(seq.Residues) > 0 {
			util.Assert(fwriter.Write(seq))
		}
	}
	util.Assert(fwriter.Flush())
	util.Assert(w.Close())
}
Exemplo n.º 7
0
func main() {
	rfasta := util.OpenFasta(util.Arg(0))
	dir := util.Arg(1)
	util.Assert(os.MkdirAll(dir, 0777))

	fr := fasta.NewReader(rfasta)
	for {
		s, err := fr.Read()
		if err != nil {
			if err == io.EOF {
				break
			}
			util.Assert(err)
		}

		s.Name = strings.Fields(s.Name)[0]
		fw := util.CreateFile(path.Join(dir, s.Name+".fasta"))
		w := fasta.NewWriter(fw)
		util.Assert(w.Write(s))
		util.Assert(w.Flush())
		util.Assert(fw.Close())
	}
}
Exemplo n.º 8
0
func ReduceQuerySeqs(
	query *bytes.Reader) (*bytes.Reader, error) {
	buf := new(bytes.Buffer)
	f := fasta.NewWriter(buf)
	reader := fasta.NewReader(query)
	for i := 0; true; i++ {
		sequence, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, err
		}
		rs := Reduce(sequence.Bytes())
		n := sequence.Name

		result := seq.NewSequenceString(n, string(rs))
		f.Write(result)
	}
	f.Flush()

	return bytes.NewReader(buf.Bytes()), nil
}
Exemplo n.º 9
0
func writeSecondary(buf *bufio.Writer, hhm *HHM) error {
	ss := hhm.Secondary
	towrite := make([]seq.Sequence, 0, 5)
	if ss.SSdssp != nil {
		towrite = append(towrite, *ss.SSdssp)
	}
	if ss.SAdssp != nil {
		towrite = append(towrite, *ss.SAdssp)
	}
	if ss.SSpred != nil {
		towrite = append(towrite, *ss.SSpred)
	}
	if ss.SSconf != nil {
		towrite = append(towrite, *ss.SSconf)
	}
	if ss.Consensus != nil {
		towrite = append(towrite, *ss.Consensus)
	}
	w := fasta.NewWriter(buf)
	w.Asterisk = false
	w.Columns = 0
	return w.WriteAll(towrite)
}
Exemplo n.º 10
0
func main() {
	pdbEntry := util.PDBRead(flag.Arg(0))

	fasEntries := make([]seq.Sequence, 0, 5)
	if !flagSeparateChains {
		var fasEntry seq.Sequence
		if len(pdbEntry.Chains) == 1 {
			fasEntry.Name = chainHeader(pdbEntry.OneChain())
		} else {
			fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode))
		}

		seq := make([]seq.Residue, 0, 100)
		for _, chain := range pdbEntry.Chains {
			if isChainUsable(chain) {
				seq = append(seq, chain.Sequence...)
			}
		}
		fasEntry.Residues = seq

		if len(fasEntry.Residues) == 0 {
			util.Fatalf("Could not find any amino acids.")
		}
		fasEntries = append(fasEntries, fasEntry)
	} else {
		for _, chain := range pdbEntry.Chains {
			if !isChainUsable(chain) {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: chain.Sequence,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
Exemplo n.º 11
0
func processCompressedQueries(db *cablastp.DB, queryDBConf *cablastp.DBConf, inputQueryFilename string, searchBuf *bytes.Buffer) error {
	cablastp.Vprintln("Compressing queries into a database...")
	dbDirLoc := "./tmp_query_database" // TODO this should be a parameter
	qDBDirLoc, err := compressQueries(inputQueryFilename, queryDBConf, dbDirLoc)
	handleFatalError("Error compressing queries", err)
	cablastp.Vprintln("Opening DB for reading")
	qDB, err := cablastp.NewReadDB(qDBDirLoc)
	handleFatalError("Error opening query database", err)
	cablastp.Vprintln("Opening compressed queries for search...")
	compQueryFilename := qDB.CoarseFastaLocation()
	compQueries, err := getInputFasta(compQueryFilename)
	handleFatalError("Error opening compressed query file", err)

	queryBuf := new(bytes.Buffer)
	f := fasta.NewWriter(queryBuf)
	reader := fasta.NewReader(compQueries)

	for origSeqID := 0; true; origSeqID++ {

		sequence, err := reader.Read()
		if err == io.EOF {
			break
		}

		if err != nil {
			fatalf("Could not read input fasta query: %s\n", err)
		}

		origSeq := sequence.Bytes()
		n := sequence.Name
		// generate 6 ORFs
		transSeqs := cablastp.Translate(origSeq)
		for _, s := range transSeqs {
			// reduce each one
			result := seq.NewSequenceString(n, string(cablastp.Reduce(s)))

			f.Write(result)

		}

		f.Flush()
		transCoarseQueries := bytes.NewReader(queryBuf.Bytes())

		cablastp.Vprintln("\nBlasting query on coarse database...")
		err = blastCoarse(db, transCoarseQueries, searchBuf)
		handleFatalError("Error blasting coarse database", err)

		cablastp.Vprintln("Decompressing coarse blast hits...")
		expandedSequences, err := expandBlastHits(db, searchBuf)
		handleFatalError("Error decompressing coarse blast hits", err)
		if len(expandedSequences) == 0 {
			cablastp.Vprintln("No results from coarse search")
		} else {
			cablastp.Vprintln("Making FASTA from coarse blast hits...")
			searchBuf.Reset()
			err = writeFasta(expandedSequences, searchBuf)
			handleFatalError("Could not create FASTA input from coarse hits", err)

			cablastp.Vprintln("Expanding coarse query...")
			expQuery, err := expandCoarseSequence(qDB, origSeqID, &sequence)
			handleFatalError("Could not expand coarse queries", err)

			fineQueryBuf := new(bytes.Buffer)
			fineWriter := fasta.NewWriter(fineQueryBuf)
			for _, fineQuery := range expQuery {
				fineQueryBytes := fineQuery.FastaSeq().Bytes() // <- Is This the same as fineQuery.Residues()?
				fineName := fineQuery.Name
				writeSeq := seq.NewSequenceString(fineName, string(fineQueryBytes))
				fineWriter.Write(writeSeq)
			}
			fineWriter.Flush()
			transFineQueries := bytes.NewReader(fineQueryBuf.Bytes())

			cablastp.Vprintln("Building fine BLAST target database...")
			targetTmpDir, err := makeFineBlastDB(db, searchBuf)
			handleFatalError("Could not create fine database to search on", err)

			cablastp.Vprintln("Blasting original query on fine database...")
			err = blastFine(db, targetTmpDir, transFineQueries)
			handleFatalError("Error blasting fine database", err)
			err = os.RemoveAll(targetTmpDir)
			handleFatalError("Could not remove fine database", err)
		}
		queryBuf.Reset()
	}
	cablastp.Vprintln("Cleaning up...")
	err = os.RemoveAll(dbDirLoc)
	handleFatalError("Could not remove query database", err)
	return nil
}
Exemplo n.º 12
0
func main() {

	searchBuf := new(bytes.Buffer) // might need more than 1 buffer

	if flag.NArg() != 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		cablastp.Verbose = true
	}

	queryDBConf := argDBConf.DeepCopy() // deep copy of the default DBConf
	// updated by the args
	inputFastaQueryName := flag.Arg(1)
	db, err := cablastp.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}
	// For query-compression mode, we first run compression on the query file
	// then coarse-coarse search, decompress both, fine-fine search.
	// otherwise, just coarse search, decompress results, fine search.
	// iterate over the query sequences in the input fasta
	// initially, only implement standard search.

	if flagCompressQuery {

		processCompressedQueries(db, queryDBConf, inputFastaQueryName, searchBuf)

	} else {

		queryBuf := new(bytes.Buffer) // might need more than 1 buffer
		inputFastaQuery, err := getInputFasta(inputFastaQueryName)
		handleFatalError("Could not read input fasta query", err)

		f := fasta.NewWriter(queryBuf)
		reader := fasta.NewReader(inputFastaQuery)

		for i := 0; true; i++ {

			if flagIterativeQuery {

				for j := 0; j < flagQueryChunkSize; j++ {
					translateQueries(reader, f)
				}

				transQueries := bytes.NewReader(queryBuf.Bytes())
				processQueries(db, transQueries, searchBuf)
				queryBuf.Reset()

			} else {
				translateQueries(reader, f)
			}

		}

		if !flagIterativeQuery {
			cablastp.Vprintln("\nProcessing Queries in one batch...")
			f.Flush()
			transQueries := bytes.NewReader(queryBuf.Bytes())
			processQueries(db, transQueries, searchBuf)
		}
	}

	cleanup(db)
}