func compressQueries(queryFileName string) (string, error) { // dbDirLoc, err := ioutil.TempDir(flagTempFileDir, "temporary-compressed-query-db") dbDirLoc := flagTempFileDir dbDirLoc = dbDirLoc + "/temporary-compressed-query-db" if flagQueryDBConf != "" { qdbParams, err := os.Open(flagQueryDBConf) if err != nil { return "", fmt.Errorf("Failed to load query db conf: %s", err) } queryDBConf, err = mica.LoadDBConf(qdbParams) if err != nil { return "", fmt.Errorf("Failed to load query db conf: %s", err) } } db, err := mica.NewWriteDB(false, queryDBConf, dbDirLoc) handleFatalError("Failed to open new db", err) mica.Vprintln("Starting query compress workers...") pool := mica.StartCompressReducedWorkers(db) seqId := db.ComDB.NumSequences() mainQuit := make(chan struct{}, 0) seqChan, err := mica.ReadOriginalSeqs(queryFileName, []byte{}) handleFatalError("Could not read query sequences", err) mica.Vprintln("Reading sequences into query database...") for readSeq := range seqChan { // Do a non-blocking receive to see if main needs to quit. select { case <-mainQuit: <-mainQuit // wait for cleanup to finish before exiting main. return "", nil default: } handleFatalError("Failed to read sequence", readSeq.Err) queryDBConf.BlastDBSize += uint64(readSeq.Seq.Len()) redReadSeq := &mica.ReducedSeq{ &mica.Sequence{ Name: readSeq.Seq.Name, Residues: readSeq.Seq.Residues, Offset: readSeq.Seq.Offset, Id: readSeq.Seq.Id, }, } seqId = pool.CompressReduced(seqId, redReadSeq) } mica.Vprintln("Cleaning up query database...") mica.CleanupDB(db, &pool) mica.Vprintln("") return dbDirLoc, nil }
func main() { if flag.NArg() < 2 { flag.Usage() } // If both 'append' and 'overwrite' flags are set, quit because the // combination doesn't make sense. if flagAppend && flagOverwrite { fatalf("Both the 'append' and 'overwrite' flags are set. It does " + "not make sense to set both of these flags.") } // If the quiet flag isn't set, enable verbose output. if !flagQuiet { mica.Verbose = true } // If the overwrite flag is set, remove whatever directory that may // already be there. if flagOverwrite { if err := os.RemoveAll(flag.Arg(0)); err != nil { fatalf("Could not remove existing database '%s': %s.", flag.Arg(0), err) } } // Create a new database for writing. If we're appending, we load // the coarse database into memory, and setup the database for writing. db, err := mica.NewWriteDB(flagAppend, dbConf, flag.Arg(0)) if err != nil { fatalf("%s\n", err) } mica.Vprintln("") pool := startCompressWorkers(db) orgSeqId := db.ComDB.NumSequences() mainQuit := make(chan struct{}, 0) // If the process is killed, try to clean up elegantly. // The idea is to preserve the integrity of the database. attachSignalHandler(db, mainQuit, &pool) // Start the CPU profile after all of the data has been read. if len(flagCpuProfile) > 0 { f, err := os.Create(flagCpuProfile) if err != nil { fatalf("%s\n", err) } pprof.StartCPUProfile(f) } for _, arg := range flag.Args()[1:] { seqChan, err := mica.ReadOriginalSeqs(arg, ignoredResidues) if err != nil { log.Fatal(err) } if orgSeqId == 0 { timer = time.Now() } for readSeq := range seqChan { // Do a non-blocking receive to see if main needs to quit. select { case <-mainQuit: <-mainQuit // wait for cleanup to finish before exiting main. return default: } if readSeq.Err != nil { log.Fatal(err) } dbConf.BlastDBSize += uint64(readSeq.Seq.Len()) orgSeqId = pool.compress(orgSeqId, readSeq.Seq) verboseOutput(db, orgSeqId) if flagMaxSeedsGB > 0 && orgSeqId%10000 == 0 { db.CoarseDB.Seeds.MaybeWipe(flagMaxSeedsGB) runtime.GC() } } } mica.Vprintln("\n") mica.Vprintf("Wrote %s.\n", mica.FileCompressed) mica.Vprintf("Wrote %s.\n", mica.FileIndex) cleanup(db, &pool) }