func compressQueries(queryFileName string) (string, error) { // dbDirLoc, err := ioutil.TempDir(flagTempFileDir, "temporary-compressed-query-db") dbDirLoc := flagTempFileDir dbDirLoc = dbDirLoc + "/temporary-compressed-query-db" if flagQueryDBConf != "" { qdbParams, err := os.Open(flagQueryDBConf) if err != nil { return "", fmt.Errorf("Failed to load query db conf: %s", err) } queryDBConf, err = mica.LoadDBConf(qdbParams) if err != nil { return "", fmt.Errorf("Failed to load query db conf: %s", err) } } db, err := mica.NewWriteDB(false, queryDBConf, dbDirLoc) handleFatalError("Failed to open new db", err) mica.Vprintln("Starting query compress workers...") pool := mica.StartCompressReducedWorkers(db) seqId := db.ComDB.NumSequences() mainQuit := make(chan struct{}, 0) seqChan, err := mica.ReadOriginalSeqs(queryFileName, []byte{}) handleFatalError("Could not read query sequences", err) mica.Vprintln("Reading sequences into query database...") for readSeq := range seqChan { // Do a non-blocking receive to see if main needs to quit. select { case <-mainQuit: <-mainQuit // wait for cleanup to finish before exiting main. return "", nil default: } handleFatalError("Failed to read sequence", readSeq.Err) queryDBConf.BlastDBSize += uint64(readSeq.Seq.Len()) redReadSeq := &mica.ReducedSeq{ &mica.Sequence{ Name: readSeq.Seq.Name, Residues: readSeq.Seq.Residues, Offset: readSeq.Seq.Offset, Id: readSeq.Seq.Id, }, } seqId = pool.CompressReduced(seqId, redReadSeq) } mica.Vprintln("Cleaning up query database...") mica.CleanupDB(db, &pool) mica.Vprintln("") return dbDirLoc, nil }
func main() { if flag.NArg() != 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } db, err := mica.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } inputFastaQueryName := flag.Arg(1) aaQueryFile, err := os.Open(inputFastaQueryName) if err != nil { fatalf("Could not open '%s' query file: %s\n", inputFastaQueryName, err) } mica.Vprintln("\nProcessing Queries...") err = processQueries(db, aaQueryFile) if err != nil { fatalf("Error processing queries: %s\n", err) } cleanup(db) }
func main() { if flag.NArg() < 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } // Open the fasta file specified for writing. outFasta, err := os.Create(flag.Arg(1)) if err != nil { fatalf("Could not write to '%s': %s\n", flag.Arg(1), err) } fastaWriter := fasta.NewWriter(outFasta) fastaWriter.Asterisk = true // Create a new database for writing. If we're appending, we load // the coarse database into memory, and setup the database for writing. db, err := mica.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } mica.Vprintln("") // Start the CPU profile after all of the data has been read. if len(flagCpuProfile) > 0 { f, err := os.Create(flagCpuProfile) if err != nil { fatalf("%s\n", err) } pprof.StartCPUProfile(f) } numSeqs := db.ComDB.NumSequences() for orgSeqId := 0; orgSeqId < numSeqs; orgSeqId++ { oseq, err := db.ComDB.ReadSeq(db.CoarseDB, orgSeqId) if err != nil { fatalf("Error reading seq id '%d': %s\n", orgSeqId, err) } if err := fastaWriter.Write(oseq.FastaSeq()); err != nil { mica.Vprintf("Error writing seq '%s': %s\n", oseq.Name, err) } } cleanup(db) if err = fastaWriter.Flush(); err != nil { fatalf("%s\n", err) } if err = outFasta.Close(); err != nil { fatalf("%s\n", err) } }
func main() { if flag.NArg() != 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } db, err := mica.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } inputFastaQueryName := flag.Arg(1) if flagCompressQuery { fatalf("Query compression is currently unsupported.\nExiting.\n", "") mica.Vprintln("\nProcessing queries with query-side compression...") err = processCompressedQueries(db, inputFastaQueryName) if err != nil { fatalf("Error processing queries with query-side compression: %s\n", err) } } else { mica.Vprintln("\nProcessing queries...") err = processQueries(db, inputFastaQueryName) if err != nil { fatalf("Error processing queries: %s\n", err) } } cleanup(db) }
// When the program ends (either by SIGTERM or when all of the input sequences // are compressed), 'cleanup' is executed. It writes all CPU/memory profiles // if they're enabled, waits for the compression workers to finish, saves // the database to disk and closes all file handles. func cleanup(db *mica.DB, pool *compressPool) { mica.Vprintln("Cleaning up and saving.") if len(flagCpuProfile) > 0 { pprof.StopCPUProfile() } if len(flagMemProfile) > 0 { writeMemProfile(fmt.Sprintf("%s.last", flagMemProfile)) } if len(flagMemStats) > 0 { writeMemStats(fmt.Sprintf("%s.last", flagMemStats)) } pool.done() if err := db.Save(); err != nil { fatalf("Could not save database: %s\n", err) } db.WriteClose() }
func main() { if flag.NArg() < 2 { flag.Usage() } // If both 'append' and 'overwrite' flags are set, quit because the // combination doesn't make sense. if flagAppend && flagOverwrite { fatalf("Both the 'append' and 'overwrite' flags are set. It does " + "not make sense to set both of these flags.") } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } // If the overwrite flag is set, remove whatever directory that may // already be there. if flagOverwrite { if err := os.RemoveAll(flag.Arg(0)); err != nil { fatalf("Could not remove existing database '%s': %s.", flag.Arg(0), err) } } // Create a new database for writing. If we're appending, we load // the coarse database into memory, and setup the database for writing. db, err := mica.NewWriteDB(flagAppend, dbConf, flag.Arg(0)) if err != nil { fatalf("%s\n", err) } mica.Vprintln("") pool := startCompressWorkers(db) orgSeqId := db.ComDB.NumSequences() mainQuit := make(chan struct{}, 0) // If the process is killed, try to clean up elegantly. // The idea is to preserve the integrity of the database. attachSignalHandler(db, mainQuit, &pool) // Start the CPU profile after all of the data has been read. if len(flagCpuProfile) > 0 { f, err := os.Create(flagCpuProfile) if err != nil { fatalf("%s\n", err) } pprof.StartCPUProfile(f) } for _, arg := range flag.Args()[1:] { seqChan, err := mica.ReadOriginalSeqs(arg, ignoredResidues) if err != nil { log.Fatal(err) } if orgSeqId == 0 { timer = time.Now() } for readSeq := range seqChan { // Do a non-blocking receive to see if main needs to quit. select { case <-mainQuit: <-mainQuit // wait for cleanup to finish before exiting main. return default: } if readSeq.Err != nil { log.Fatal(err) } dbConf.BlastDBSize += uint64(readSeq.Seq.Len()) orgSeqId = pool.compress(orgSeqId, readSeq.Seq) verboseOutput(db, orgSeqId) if flagMaxSeedsGB > 0 && orgSeqId%10000 == 0 { db.CoarseDB.Seeds.MaybeWipe(flagMaxSeedsGB) runtime.GC() } } } mica.Vprintln("\n") mica.Vprintf("Wrote %s.\n", mica.FileCompressed) mica.Vprintf("Wrote %s.\n", mica.FileIndex) cleanup(db, &pool) }
func main() { buf := new(bytes.Buffer) if flag.NArg() < 2 { flag.Usage() } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } inputFastaQuery, err := getInputFasta() if err != nil { fatalf("Could not read input fasta query: %s\n", err) } db, err := mica.NewReadDB(flag.Arg(0)) if err != nil { fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err) } mica.Vprintln("\nBlasting query on coarse database...") if err := blastCoarse(db, inputFastaQuery, buf); err != nil { fatalf("Error blasting coarse database: %s\n", err) } mica.Vprintln("Decompressing blast hits...") expandedSequences, err := expandBlastHits(db, buf) if err != nil { fatalf("%s\n", err) } // Clear the buffer and write the fasta file to it. buf.Reset() if err := writeFasta(expandedSequences, buf); err != nil { fatalf("Could not create FASTA input from coarse hits: %s\n", err) } // Create the fine blast db in a temporary directory mica.Vprintln("Building fine BLAST database...") tmpDir, err := makeFineBlastDB(db, buf) if err != nil { fatalf("Could not create fine database to search on: %s\n", err) } // Finally, run the query against the fine fasta database and pass on // stdout and stderr... mica.Vprintln("Blasting query on fine database...") if _, err := inputFastaQuery.Seek(0, os.SEEK_SET); err != nil { fatalf("Could not seek to start of query fasta input: %s\n", err) } if err := blastFine(db, tmpDir, inputFastaQuery); err != nil { fatalf("Error blasting fine database: %s\n", err) } // Delete the temporary fine database. if !flagNoCleanup { if err := os.RemoveAll(tmpDir); err != nil { fatalf("Could not delete fine BLAST database: %s\n", err) } } cleanup(db) }
func processQueries(db *mica.DB, nuclQueryFile *os.File) error { mica.Vprintln("\nBlasting with diamond query on coarse database...") dmndOutDaaFilename, err := dmndBlastPCoarse(db, nuclQueryFile) if err != nil { fatalf("Error blasting with diamond on coarse database: %s\n", err) } dmndOutFile, err := convertDmndToBlastTabular(dmndOutDaaFilename) if err != nil { return fmt.Errorf("Error convertign diamond output to blast tabular: %s\n") } mica.Vprintln("Decompressing diamond hits...") dmndOutArr, err := ioutil.ReadAll(dmndOutFile) if !flagNoCleanup { err := os.RemoveAll(dmndOutFile.Name()) handleFatalError("Could not delete diamond output from coarse search", err) err = os.RemoveAll(dmndOutDaaFilename) handleFatalError("Could not delete diamond output from coarse search", err) } if err != nil { return fmt.Errorf("Could not read diamond output: %s", err) } if len(dmndOutArr) == 0 { return fmt.Errorf("No coarse hits. %s", "Aborting.") } mica.Vprintln("Expanding diamond hits...") dmndOut := bytes.NewBuffer(dmndOutArr) expandedSequences, err := expandDmndHits(db, dmndOut) if err != nil { return fmt.Errorf("%s\n", err) } // Write the contents of the expanded sequences to a fasta file. // It is then indexed using makeblastdb. searchBuf := new(bytes.Buffer) if err := writeFasta(expandedSequences, searchBuf); err != nil { fatalf("Could not create FASTA input from coarse hits: %s\n", err) } if flagDmndFine != "" { mica.Vprintln("Building fine DIAMOND database...") tmpFineDB, err := makeFineDmndDB(searchBuf) handleFatalError("Could not create fine diamond database to search on", err) err = dmndBlastPFine(nuclQueryFile, flagDmndFine, tmpFineDB) handleFatalError("Error diamond-blasting (p-search) fine database", err) // Delete the temporary fine database. if !flagNoCleanup { err := os.RemoveAll(tmpFineDB) err = os.RemoveAll(tmpFineDB + ".dmnd") handleFatalError("Could not delete fine DIAMOND database", err) } } else { // Create the fine blast db in a temporary directory mica.Vprintln("Building fine BLAST database...") tmpFineDB, err := makeFineBlastDB(db, searchBuf) handleFatalError("Could not create fine blast database to search on", err) // retrieve the cluster members for the original representative query seq // pass them to blastx on the expanded (fine) db // Finally, run the query against the fine fasta database and pass on the // stdout and stderr... bs, err := ioutil.ReadAll(nuclQueryFile) if err != nil { return fmt.Errorf("Could not read input fasta query: %s", err) } nuclQueryReader := bytes.NewReader(bs) err = blastFine(db, tmpFineDB, nuclQueryReader) handleFatalError("Error blasting fine database (p-search):", err) // Delete the temporary fine database. if !flagNoCleanup { err := os.RemoveAll(tmpFineDB) handleFatalError("Could not delete fine BLAST database", err) } } return nil }
func processCompressedQueries(db *mica.DB, nuclQueryFileLoc string) error { queryDbLoc, err := compressQueries(nuclQueryFileLoc) if err != nil { return fmt.Errorf("Error compressing queries: %s\n", err) } queryDb, err := mica.NewReadDB(queryDbLoc) if err != nil { return fmt.Errorf("Error opening newly created compressed query db: %s\n", err) } mica.Vprintln("\nBlasting with diamond query on coarse database...") dmndOutDaaFilename, err := dmndBlastXCoarse(db, queryDb.CoarseDB.FileFasta.Name()) if err != nil { return fmt.Errorf("Error blasting with diamond on coarse database from compressed queries: %s\n", err) } dmndOutFile, err := convertDmndToBlastTabular(dmndOutDaaFilename) if err != nil { return fmt.Errorf("Error converting diamond output to blast tabular: %s\n") } mica.Vprintln("Decompressing diamond hits...") dmndOutArr, err := ioutil.ReadAll(dmndOutFile) // nuclQueryFile, err := os.Open(nuclQueryFileLoc) // if err != nil { // fatalf("Could not open '%s' query file for fine search: %s\n", nuclQueryFileLoc, err) // } if !flagNoCleanup { err := os.RemoveAll(dmndOutFile.Name()) handleFatalError("Could not delete diamond output from coarse search", err) err = os.RemoveAll(dmndOutDaaFilename) handleFatalError("Could not delete diamond output from coarse search", err) err = os.RemoveAll(queryDbLoc) handleFatalError("Could not delete diamond output from coarse search", err) } if err != nil { return fmt.Errorf("Could not read diamond output: %s", err) } if len(dmndOutArr) == 0 { return fmt.Errorf("No coarse hits. %s", "Aborting.") } mica.Vprintln("Expanding diamond hits (queries and targets)...") dmndOut := bytes.NewBuffer(dmndOutArr) expandedSequences, expandedQueries, err := expandDmndHitsAndQuery(db, queryDb, dmndOut) if err != nil { return fmt.Errorf("%s\n", err) } // Write the contents of the expanded sequences to a fasta file. // It is then indexed using makeblastdb. searchBuf := new(bytes.Buffer) if err := writeFasta(expandedSequences, searchBuf); err != nil { fatalf("Could not create FASTA input from coarse hits: %s\n", err) } qSearchBuf := new(bytes.Buffer) if err := writeFasta(expandedQueries, qSearchBuf); err != nil { fatalf("Could not create FASTA input from coarse QUERY hits: %s\n", err) } fineQueryFile, err := ioutil.TempFile(flagTempFileDir, "fine-query-sequences") if err != nil { fatalf("Could not create temporary QUERY sequence file: %s\n", err) } err = ioutil.WriteFile(fineQueryFile.Name(), qSearchBuf.Bytes(), 0666) if err != nil { fatalf("Could not write to temporary QUERY sequence file: %s\n", err) } if flagDmndFine != "" { mica.Vprintln("Building fine DIAMOND database...") tmpFineDB, err := makeFineDmndDB(searchBuf) handleFatalError("Could not create fine diamond database to search on", err) err = dmndBlastXFine(fineQueryFile.Name(), flagDmndFine, tmpFineDB) handleFatalError("Error diamond-blasting (x-search) fine database", err) // Delete the temporary fine database. if !flagNoCleanup { err := os.RemoveAll(tmpFineDB) err = os.RemoveAll(tmpFineDB + ".dmnd") handleFatalError("Could not delete fine DIAMOND database", err) } } else { // Create the fine blast db in a temporary directory mica.Vprintln("Building fine BLAST database...") tmpFineDB, err := makeFineBlastDB(db, searchBuf) handleFatalError("Could not create fine blast database to search on", err) // retrieve the cluster members for the original representative query seq // pass them to blastx on the expanded (fine) db // Finally, run the query against the fine fasta database and pass on the // stdout and stderr... bs, err := ioutil.ReadAll(fineQueryFile) if err != nil { return fmt.Errorf("Could not read input fasta query: %s", err) } nuclQueryReader := bytes.NewReader(bs) err = blastFine(db, tmpFineDB, nuclQueryReader) handleFatalError("Error blasting fine database (x-search):", err) // Delete the temporary fine database. if !flagNoCleanup { err := os.RemoveAll(tmpFineDB) handleFatalError("Could not delete fine BLAST database", err) } } return nil }