Exemple #1
0
func compressQueries(queryFileName string) (string, error) {
	// dbDirLoc, err := ioutil.TempDir(flagTempFileDir, "temporary-compressed-query-db")
	dbDirLoc := flagTempFileDir
	dbDirLoc = dbDirLoc + "/temporary-compressed-query-db"

	if flagQueryDBConf != "" {
		qdbParams, err := os.Open(flagQueryDBConf)
		if err != nil {
			return "", fmt.Errorf("Failed to load query db conf: %s", err)
		}
		queryDBConf, err = mica.LoadDBConf(qdbParams)
		if err != nil {
			return "", fmt.Errorf("Failed to load query db conf: %s", err)
		}
	}

	db, err := mica.NewWriteDB(false, queryDBConf, dbDirLoc)
	handleFatalError("Failed to open new db", err)
	mica.Vprintln("Starting query compress workers...")
	pool := mica.StartCompressReducedWorkers(db)
	seqId := db.ComDB.NumSequences()
	mainQuit := make(chan struct{}, 0)

	seqChan, err := mica.ReadOriginalSeqs(queryFileName, []byte{})
	handleFatalError("Could not read query sequences", err)
	mica.Vprintln("Reading sequences into query database...")
	for readSeq := range seqChan {
		// Do a non-blocking receive to see if main needs to quit.
		select {
		case <-mainQuit:
			<-mainQuit // wait for cleanup to finish before exiting main.
			return "", nil
		default:
		}

		handleFatalError("Failed to read sequence", readSeq.Err)

		queryDBConf.BlastDBSize += uint64(readSeq.Seq.Len())
		redReadSeq := &mica.ReducedSeq{
			&mica.Sequence{
				Name:     readSeq.Seq.Name,
				Residues: readSeq.Seq.Residues,
				Offset:   readSeq.Seq.Offset,
				Id:       readSeq.Seq.Id,
			},
		}
		seqId = pool.CompressReduced(seqId, redReadSeq)
	}
	mica.Vprintln("Cleaning up query database...")
	mica.CleanupDB(db, &pool)
	mica.Vprintln("")

	return dbDirLoc, nil
}
Exemple #2
0
func main() {
	if flag.NArg() < 2 {
		flag.Usage()
	}

	// If both 'append' and 'overwrite' flags are set, quit because the
	// combination doesn't make sense.
	if flagAppend && flagOverwrite {
		fatalf("Both the 'append' and 'overwrite' flags are set. It does " +
			"not make sense to set both of these flags.")
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	// If the overwrite flag is set, remove whatever directory that may
	// already be there.
	if flagOverwrite {
		if err := os.RemoveAll(flag.Arg(0)); err != nil {
			fatalf("Could not remove existing database '%s': %s.",
				flag.Arg(0), err)
		}
	}

	// Create a new database for writing. If we're appending, we load
	// the coarse database into memory, and setup the database for writing.
	db, err := mica.NewWriteDB(flagAppend, dbConf, flag.Arg(0))
	if err != nil {
		fatalf("%s\n", err)
	}
	mica.Vprintln("")

	pool := startCompressWorkers(db)
	orgSeqId := db.ComDB.NumSequences()
	mainQuit := make(chan struct{}, 0)

	// If the process is killed, try to clean up elegantly.
	// The idea is to preserve the integrity of the database.
	attachSignalHandler(db, mainQuit, &pool)

	// Start the CPU profile after all of the data has been read.
	if len(flagCpuProfile) > 0 {
		f, err := os.Create(flagCpuProfile)
		if err != nil {
			fatalf("%s\n", err)
		}
		pprof.StartCPUProfile(f)
	}
	for _, arg := range flag.Args()[1:] {
		seqChan, err := mica.ReadOriginalSeqs(arg, ignoredResidues)
		if err != nil {
			log.Fatal(err)
		}
		if orgSeqId == 0 {
			timer = time.Now()
		}
		for readSeq := range seqChan {
			// Do a non-blocking receive to see if main needs to quit.
			select {
			case <-mainQuit:
				<-mainQuit // wait for cleanup to finish before exiting main.
				return
			default:
			}

			if readSeq.Err != nil {
				log.Fatal(err)
			}
			dbConf.BlastDBSize += uint64(readSeq.Seq.Len())
			orgSeqId = pool.compress(orgSeqId, readSeq.Seq)
			verboseOutput(db, orgSeqId)
			if flagMaxSeedsGB > 0 && orgSeqId%10000 == 0 {
				db.CoarseDB.Seeds.MaybeWipe(flagMaxSeedsGB)
				runtime.GC()
			}
		}
	}
	mica.Vprintln("\n")
	mica.Vprintf("Wrote %s.\n", mica.FileCompressed)
	mica.Vprintf("Wrote %s.\n", mica.FileIndex)

	cleanup(db, &pool)

}