func main() { var f io.Reader var err error f = util.OpenFile(flag.Arg(0)) if strings.HasSuffix(flag.Arg(0), ".gz") { f, err = gzip.NewReader(f) util.Assert(err) } cifEntry, err := pdbx.Read(f) util.Assert(err, "Could not read PDBx/mmCIF file") fasEntries := make([]seq.Sequence, 0, 5) for _, ent := range cifEntry.Entities { for _, chain := range ent.Chains { if !isChainUsable(chain) || len(ent.Seq) == 0 { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: ent.Seq, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
func TranslateQuerySeqs( query *bytes.Reader, action SearchOperator) (*bytes.Reader, error) { buf := new(bytes.Buffer) f := fasta.NewWriter(buf) reader := fasta.NewReader(query) for i := 0; true; i++ { sequence, err := reader.Read() if err == io.EOF { break } if err != nil { return nil, err } origSeq := sequence.Bytes() n := sequence.Name // generate 6 ORFs transSeqs := Translate(origSeq) for _, s := range transSeqs { result := seq.NewSequenceString(n, string(Reduce(s))) f.Write(result) } } return bytes.NewReader(buf.Bytes()), nil }
func write(writer io.Writer, msa seq.MSA, formatter formatSeq) error { w := fasta.NewWriter(writer) w.Asterisk = false w.Columns = 0 for row := range msa.Entries { if err := w.Write(formatter(row)); err != nil { return err } } return w.Flush() }
func main() { if flag.NArg() < 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { cablastp.Verbose = true } // Open the fasta file specified for writing. outFasta, err := os.Create(flag.Arg(1)) if err != nil { fatalf("Could not write to '%s': %s\n", flag.Arg(1), err) } fastaWriter := fasta.NewWriter(outFasta) fastaWriter.Asterisk = true // Create a new database for writing. If we're appending, we load // the coarse database into memory, and setup the database for writing. db, err := cablastp.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } cablastp.Vprintln("") // Start the CPU profile after all of the data has been read. if len(flagCpuProfile) > 0 { f, err := os.Create(flagCpuProfile) if err != nil { fatalf("%s\n", err) } pprof.StartCPUProfile(f) } numSeqs := db.ComDB.NumSequences() for orgSeqId := 0; orgSeqId < numSeqs; orgSeqId++ { oseq, err := db.ComDB.ReadSeq(db.CoarseDB, orgSeqId) if err != nil { fatalf("Error reading seq id '%d': %s\n", orgSeqId, err) } if err := fastaWriter.Write(oseq.FastaSeq()); err != nil { cablastp.Vprintf("Error writing seq '%s': %s\n", oseq.Name, err) } } cleanup(db) if err = fastaWriter.Flush(); err != nil { fatalf("%s\n", err) } if err = outFasta.Close(); err != nil { fatalf("%s\n", err) } }
func queryReader(queries []seq.Sequence) io.Reader { if len(queries) == 0 { return nil } buf := new(bytes.Buffer) w := fasta.NewWriter(buf) if err := w.WriteAll(queries); err != nil { // I don't think this is possible unless the underlying byte buffer // becomes too big for it to grow any more. panic(err) } return buf }
func main() { a3mPath := util.Arg(0) fa3m := util.OpenFile(a3mPath) freader := fasta.NewReader(fa3m) freader.TrustSequences = true seqs, err := freader.ReadAll() util.Assert(err, "Could not read fasta format '%s'", a3mPath) util.Assert(fa3m.Close()) w := util.CreateFile(a3mPath) fwriter := fasta.NewWriter(w) fwriter.Columns = 0 for _, seq := range seqs { if len(seq.Residues) > 0 { util.Assert(fwriter.Write(seq)) } } util.Assert(fwriter.Flush()) util.Assert(w.Close()) }
func main() { rfasta := util.OpenFasta(util.Arg(0)) dir := util.Arg(1) util.Assert(os.MkdirAll(dir, 0777)) fr := fasta.NewReader(rfasta) for { s, err := fr.Read() if err != nil { if err == io.EOF { break } util.Assert(err) } s.Name = strings.Fields(s.Name)[0] fw := util.CreateFile(path.Join(dir, s.Name+".fasta")) w := fasta.NewWriter(fw) util.Assert(w.Write(s)) util.Assert(w.Flush()) util.Assert(fw.Close()) } }
func ReduceQuerySeqs( query *bytes.Reader) (*bytes.Reader, error) { buf := new(bytes.Buffer) f := fasta.NewWriter(buf) reader := fasta.NewReader(query) for i := 0; true; i++ { sequence, err := reader.Read() if err == io.EOF { break } if err != nil { return nil, err } rs := Reduce(sequence.Bytes()) n := sequence.Name result := seq.NewSequenceString(n, string(rs)) f.Write(result) } f.Flush() return bytes.NewReader(buf.Bytes()), nil }
func writeSecondary(buf *bufio.Writer, hhm *HHM) error { ss := hhm.Secondary towrite := make([]seq.Sequence, 0, 5) if ss.SSdssp != nil { towrite = append(towrite, *ss.SSdssp) } if ss.SAdssp != nil { towrite = append(towrite, *ss.SAdssp) } if ss.SSpred != nil { towrite = append(towrite, *ss.SSpred) } if ss.SSconf != nil { towrite = append(towrite, *ss.SSconf) } if ss.Consensus != nil { towrite = append(towrite, *ss.Consensus) } w := fasta.NewWriter(buf) w.Asterisk = false w.Columns = 0 return w.WriteAll(towrite) }
func main() { pdbEntry := util.PDBRead(flag.Arg(0)) fasEntries := make([]seq.Sequence, 0, 5) if !flagSeparateChains { var fasEntry seq.Sequence if len(pdbEntry.Chains) == 1 { fasEntry.Name = chainHeader(pdbEntry.OneChain()) } else { fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode)) } seq := make([]seq.Residue, 0, 100) for _, chain := range pdbEntry.Chains { if isChainUsable(chain) { seq = append(seq, chain.Sequence...) } } fasEntry.Residues = seq if len(fasEntry.Residues) == 0 { util.Fatalf("Could not find any amino acids.") } fasEntries = append(fasEntries, fasEntry) } else { for _, chain := range pdbEntry.Chains { if !isChainUsable(chain) { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: chain.Sequence, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
func processCompressedQueries(db *cablastp.DB, queryDBConf *cablastp.DBConf, inputQueryFilename string, searchBuf *bytes.Buffer) error { cablastp.Vprintln("Compressing queries into a database...") dbDirLoc := "./tmp_query_database" // TODO this should be a parameter qDBDirLoc, err := compressQueries(inputQueryFilename, queryDBConf, dbDirLoc) handleFatalError("Error compressing queries", err) cablastp.Vprintln("Opening DB for reading") qDB, err := cablastp.NewReadDB(qDBDirLoc) handleFatalError("Error opening query database", err) cablastp.Vprintln("Opening compressed queries for search...") compQueryFilename := qDB.CoarseFastaLocation() compQueries, err := getInputFasta(compQueryFilename) handleFatalError("Error opening compressed query file", err) queryBuf := new(bytes.Buffer) f := fasta.NewWriter(queryBuf) reader := fasta.NewReader(compQueries) for origSeqID := 0; true; origSeqID++ { sequence, err := reader.Read() if err == io.EOF { break } if err != nil { fatalf("Could not read input fasta query: %s\n", err) } origSeq := sequence.Bytes() n := sequence.Name // generate 6 ORFs transSeqs := cablastp.Translate(origSeq) for _, s := range transSeqs { // reduce each one result := seq.NewSequenceString(n, string(cablastp.Reduce(s))) f.Write(result) } f.Flush() transCoarseQueries := bytes.NewReader(queryBuf.Bytes()) cablastp.Vprintln("\nBlasting query on coarse database...") err = blastCoarse(db, transCoarseQueries, searchBuf) handleFatalError("Error blasting coarse database", err) cablastp.Vprintln("Decompressing coarse blast hits...") expandedSequences, err := expandBlastHits(db, searchBuf) handleFatalError("Error decompressing coarse blast hits", err) if len(expandedSequences) == 0 { cablastp.Vprintln("No results from coarse search") } else { cablastp.Vprintln("Making FASTA from coarse blast hits...") searchBuf.Reset() err = writeFasta(expandedSequences, searchBuf) handleFatalError("Could not create FASTA input from coarse hits", err) cablastp.Vprintln("Expanding coarse query...") expQuery, err := expandCoarseSequence(qDB, origSeqID, &sequence) handleFatalError("Could not expand coarse queries", err) fineQueryBuf := new(bytes.Buffer) fineWriter := fasta.NewWriter(fineQueryBuf) for _, fineQuery := range expQuery { fineQueryBytes := fineQuery.FastaSeq().Bytes() // <- Is This the same as fineQuery.Residues()? fineName := fineQuery.Name writeSeq := seq.NewSequenceString(fineName, string(fineQueryBytes)) fineWriter.Write(writeSeq) } fineWriter.Flush() transFineQueries := bytes.NewReader(fineQueryBuf.Bytes()) cablastp.Vprintln("Building fine BLAST target database...") targetTmpDir, err := makeFineBlastDB(db, searchBuf) handleFatalError("Could not create fine database to search on", err) cablastp.Vprintln("Blasting original query on fine database...") err = blastFine(db, targetTmpDir, transFineQueries) handleFatalError("Error blasting fine database", err) err = os.RemoveAll(targetTmpDir) handleFatalError("Could not remove fine database", err) } queryBuf.Reset() } cablastp.Vprintln("Cleaning up...") err = os.RemoveAll(dbDirLoc) handleFatalError("Could not remove query database", err) return nil }
func main() { searchBuf := new(bytes.Buffer) // might need more than 1 buffer if flag.NArg() != 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { cablastp.Verbose = true } queryDBConf := argDBConf.DeepCopy() // deep copy of the default DBConf // updated by the args inputFastaQueryName := flag.Arg(1) db, err := cablastp.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } // For query-compression mode, we first run compression on the query file // then coarse-coarse search, decompress both, fine-fine search. // otherwise, just coarse search, decompress results, fine search. // iterate over the query sequences in the input fasta // initially, only implement standard search. if flagCompressQuery { processCompressedQueries(db, queryDBConf, inputFastaQueryName, searchBuf) } else { queryBuf := new(bytes.Buffer) // might need more than 1 buffer inputFastaQuery, err := getInputFasta(inputFastaQueryName) handleFatalError("Could not read input fasta query", err) f := fasta.NewWriter(queryBuf) reader := fasta.NewReader(inputFastaQuery) for i := 0; true; i++ { if flagIterativeQuery { for j := 0; j < flagQueryChunkSize; j++ { translateQueries(reader, f) } transQueries := bytes.NewReader(queryBuf.Bytes()) processQueries(db, transQueries, searchBuf) queryBuf.Reset() } else { translateQueries(reader, f) } } if !flagIterativeQuery { cablastp.Vprintln("\nProcessing Queries in one batch...") f.Flush() transQueries := bytes.NewReader(queryBuf.Bytes()) processQueries(db, transQueries, searchBuf) } } cleanup(db) }