Beispiel #1
0
func compressQueries(queryFileName string) (string, error) {
	// dbDirLoc, err := ioutil.TempDir(flagTempFileDir, "temporary-compressed-query-db")
	dbDirLoc := flagTempFileDir
	dbDirLoc = dbDirLoc + "/temporary-compressed-query-db"

	if flagQueryDBConf != "" {
		qdbParams, err := os.Open(flagQueryDBConf)
		if err != nil {
			return "", fmt.Errorf("Failed to load query db conf: %s", err)
		}
		queryDBConf, err = mica.LoadDBConf(qdbParams)
		if err != nil {
			return "", fmt.Errorf("Failed to load query db conf: %s", err)
		}
	}

	db, err := mica.NewWriteDB(false, queryDBConf, dbDirLoc)
	handleFatalError("Failed to open new db", err)
	mica.Vprintln("Starting query compress workers...")
	pool := mica.StartCompressReducedWorkers(db)
	seqId := db.ComDB.NumSequences()
	mainQuit := make(chan struct{}, 0)

	seqChan, err := mica.ReadOriginalSeqs(queryFileName, []byte{})
	handleFatalError("Could not read query sequences", err)
	mica.Vprintln("Reading sequences into query database...")
	for readSeq := range seqChan {
		// Do a non-blocking receive to see if main needs to quit.
		select {
		case <-mainQuit:
			<-mainQuit // wait for cleanup to finish before exiting main.
			return "", nil
		default:
		}

		handleFatalError("Failed to read sequence", readSeq.Err)

		queryDBConf.BlastDBSize += uint64(readSeq.Seq.Len())
		redReadSeq := &mica.ReducedSeq{
			&mica.Sequence{
				Name:     readSeq.Seq.Name,
				Residues: readSeq.Seq.Residues,
				Offset:   readSeq.Seq.Offset,
				Id:       readSeq.Seq.Id,
			},
		}
		seqId = pool.CompressReduced(seqId, redReadSeq)
	}
	mica.Vprintln("Cleaning up query database...")
	mica.CleanupDB(db, &pool)
	mica.Vprintln("")

	return dbDirLoc, nil
}
Beispiel #2
0
func main() {

	if flag.NArg() != 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	db, err := mica.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}

	inputFastaQueryName := flag.Arg(1)
	aaQueryFile, err := os.Open(inputFastaQueryName)
	if err != nil {
		fatalf("Could not open '%s' query file: %s\n", inputFastaQueryName, err)
	}

	mica.Vprintln("\nProcessing Queries...")
	err = processQueries(db, aaQueryFile)
	if err != nil {
		fatalf("Error processing queries: %s\n", err)
	}

	cleanup(db)
}
Beispiel #3
0
func main() {
	if flag.NArg() < 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	// Open the fasta file specified for writing.
	outFasta, err := os.Create(flag.Arg(1))
	if err != nil {
		fatalf("Could not write to '%s': %s\n", flag.Arg(1), err)
	}
	fastaWriter := fasta.NewWriter(outFasta)
	fastaWriter.Asterisk = true

	// Create a new database for writing. If we're appending, we load
	// the coarse database into memory, and setup the database for writing.
	db, err := mica.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}
	mica.Vprintln("")

	// Start the CPU profile after all of the data has been read.
	if len(flagCpuProfile) > 0 {
		f, err := os.Create(flagCpuProfile)
		if err != nil {
			fatalf("%s\n", err)
		}
		pprof.StartCPUProfile(f)
	}

	numSeqs := db.ComDB.NumSequences()
	for orgSeqId := 0; orgSeqId < numSeqs; orgSeqId++ {
		oseq, err := db.ComDB.ReadSeq(db.CoarseDB, orgSeqId)
		if err != nil {
			fatalf("Error reading seq id '%d': %s\n", orgSeqId, err)
		}
		if err := fastaWriter.Write(oseq.FastaSeq()); err != nil {
			mica.Vprintf("Error writing seq '%s': %s\n", oseq.Name, err)
		}
	}

	cleanup(db)
	if err = fastaWriter.Flush(); err != nil {
		fatalf("%s\n", err)
	}
	if err = outFasta.Close(); err != nil {
		fatalf("%s\n", err)
	}
}
Beispiel #4
0
func main() {

	if flag.NArg() != 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	db, err := mica.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}

	inputFastaQueryName := flag.Arg(1)

	if flagCompressQuery {
		fatalf("Query compression is currently unsupported.\nExiting.\n", "")
		mica.Vprintln("\nProcessing queries with query-side compression...")
		err = processCompressedQueries(db, inputFastaQueryName)
		if err != nil {
			fatalf("Error processing queries with query-side compression: %s\n", err)
		}
	} else {

		mica.Vprintln("\nProcessing queries...")
		err = processQueries(db, inputFastaQueryName)
		if err != nil {
			fatalf("Error processing queries: %s\n", err)
		}
	}

	cleanup(db)
}
Beispiel #5
0
// When the program ends (either by SIGTERM or when all of the input sequences
// are compressed), 'cleanup' is executed. It writes all CPU/memory profiles
// if they're enabled, waits for the compression workers to finish, saves
// the database to disk and closes all file handles.
func cleanup(db *mica.DB, pool *compressPool) {
	mica.Vprintln("Cleaning up and saving.")
	if len(flagCpuProfile) > 0 {
		pprof.StopCPUProfile()
	}
	if len(flagMemProfile) > 0 {
		writeMemProfile(fmt.Sprintf("%s.last", flagMemProfile))
	}
	if len(flagMemStats) > 0 {
		writeMemStats(fmt.Sprintf("%s.last", flagMemStats))
	}
	pool.done()
	if err := db.Save(); err != nil {
		fatalf("Could not save database: %s\n", err)
	}
	db.WriteClose()
}
Beispiel #6
0
func main() {
	if flag.NArg() < 2 {
		flag.Usage()
	}

	// If both 'append' and 'overwrite' flags are set, quit because the
	// combination doesn't make sense.
	if flagAppend && flagOverwrite {
		fatalf("Both the 'append' and 'overwrite' flags are set. It does " +
			"not make sense to set both of these flags.")
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	// If the overwrite flag is set, remove whatever directory that may
	// already be there.
	if flagOverwrite {
		if err := os.RemoveAll(flag.Arg(0)); err != nil {
			fatalf("Could not remove existing database '%s': %s.",
				flag.Arg(0), err)
		}
	}

	// Create a new database for writing. If we're appending, we load
	// the coarse database into memory, and setup the database for writing.
	db, err := mica.NewWriteDB(flagAppend, dbConf, flag.Arg(0))
	if err != nil {
		fatalf("%s\n", err)
	}
	mica.Vprintln("")

	pool := startCompressWorkers(db)
	orgSeqId := db.ComDB.NumSequences()
	mainQuit := make(chan struct{}, 0)

	// If the process is killed, try to clean up elegantly.
	// The idea is to preserve the integrity of the database.
	attachSignalHandler(db, mainQuit, &pool)

	// Start the CPU profile after all of the data has been read.
	if len(flagCpuProfile) > 0 {
		f, err := os.Create(flagCpuProfile)
		if err != nil {
			fatalf("%s\n", err)
		}
		pprof.StartCPUProfile(f)
	}
	for _, arg := range flag.Args()[1:] {
		seqChan, err := mica.ReadOriginalSeqs(arg, ignoredResidues)
		if err != nil {
			log.Fatal(err)
		}
		if orgSeqId == 0 {
			timer = time.Now()
		}
		for readSeq := range seqChan {
			// Do a non-blocking receive to see if main needs to quit.
			select {
			case <-mainQuit:
				<-mainQuit // wait for cleanup to finish before exiting main.
				return
			default:
			}

			if readSeq.Err != nil {
				log.Fatal(err)
			}
			dbConf.BlastDBSize += uint64(readSeq.Seq.Len())
			orgSeqId = pool.compress(orgSeqId, readSeq.Seq)
			verboseOutput(db, orgSeqId)
			if flagMaxSeedsGB > 0 && orgSeqId%10000 == 0 {
				db.CoarseDB.Seeds.MaybeWipe(flagMaxSeedsGB)
				runtime.GC()
			}
		}
	}
	mica.Vprintln("\n")
	mica.Vprintf("Wrote %s.\n", mica.FileCompressed)
	mica.Vprintf("Wrote %s.\n", mica.FileIndex)

	cleanup(db, &pool)

}
Beispiel #7
0
func main() {
	buf := new(bytes.Buffer)

	if flag.NArg() < 2 {
		flag.Usage()
	}

	// If the quiet flag isn't set, enable verbose output.
	if !flagQuiet {
		mica.Verbose = true
	}

	inputFastaQuery, err := getInputFasta()
	if err != nil {
		fatalf("Could not read input fasta query: %s\n", err)
	}

	db, err := mica.NewReadDB(flag.Arg(0))
	if err != nil {
		fatalf("Could not open '%s' database: %s\n", flag.Arg(0), err)
	}

	mica.Vprintln("\nBlasting query on coarse database...")
	if err := blastCoarse(db, inputFastaQuery, buf); err != nil {
		fatalf("Error blasting coarse database: %s\n", err)
	}

	mica.Vprintln("Decompressing blast hits...")
	expandedSequences, err := expandBlastHits(db, buf)
	if err != nil {
		fatalf("%s\n", err)
	}

	// Clear the buffer and write the fasta file to it.
	buf.Reset()
	if err := writeFasta(expandedSequences, buf); err != nil {
		fatalf("Could not create FASTA input from coarse hits: %s\n", err)
	}
	// Create the fine blast db in a temporary directory
	mica.Vprintln("Building fine BLAST database...")
	tmpDir, err := makeFineBlastDB(db, buf)
	if err != nil {
		fatalf("Could not create fine database to search on: %s\n", err)
	}

	// Finally, run the query against the fine fasta database and pass on
	// stdout and stderr...
	mica.Vprintln("Blasting query on fine database...")
	if _, err := inputFastaQuery.Seek(0, os.SEEK_SET); err != nil {
		fatalf("Could not seek to start of query fasta input: %s\n", err)
	}
	if err := blastFine(db, tmpDir, inputFastaQuery); err != nil {
		fatalf("Error blasting fine database: %s\n", err)
	}

	// Delete the temporary fine database.
	if !flagNoCleanup {
		if err := os.RemoveAll(tmpDir); err != nil {
			fatalf("Could not delete fine BLAST database: %s\n", err)
		}
	}

	cleanup(db)
}
Beispiel #8
0
func processQueries(db *mica.DB, nuclQueryFile *os.File) error {

	mica.Vprintln("\nBlasting with diamond query on coarse database...")
	dmndOutDaaFilename, err := dmndBlastPCoarse(db, nuclQueryFile)
	if err != nil {
		fatalf("Error blasting with diamond on coarse database: %s\n", err)
	}

	dmndOutFile, err := convertDmndToBlastTabular(dmndOutDaaFilename)
	if err != nil {
		return fmt.Errorf("Error convertign diamond output to blast tabular: %s\n")
	}

	mica.Vprintln("Decompressing diamond hits...")
	dmndOutArr, err := ioutil.ReadAll(dmndOutFile)

	if !flagNoCleanup {
		err := os.RemoveAll(dmndOutFile.Name())
		handleFatalError("Could not delete diamond output from coarse search", err)
		err = os.RemoveAll(dmndOutDaaFilename)
		handleFatalError("Could not delete diamond output from coarse search", err)
	}

	if err != nil {
		return fmt.Errorf("Could not read diamond output: %s", err)
	}
	if len(dmndOutArr) == 0 {
		return fmt.Errorf("No coarse hits. %s", "Aborting.")
	}
	mica.Vprintln("Expanding diamond hits...")
	dmndOut := bytes.NewBuffer(dmndOutArr)
	expandedSequences, err := expandDmndHits(db, dmndOut)
	if err != nil {
		return fmt.Errorf("%s\n", err)
	}

	// Write the contents of the expanded sequences to a fasta file.
	// It is then indexed using makeblastdb.
	searchBuf := new(bytes.Buffer)
	if err := writeFasta(expandedSequences, searchBuf); err != nil {
		fatalf("Could not create FASTA input from coarse hits: %s\n", err)
	}

	if flagDmndFine != "" {

		mica.Vprintln("Building fine DIAMOND database...")
		tmpFineDB, err := makeFineDmndDB(searchBuf)
		handleFatalError("Could not create fine diamond database to search on", err)

		err = dmndBlastPFine(nuclQueryFile, flagDmndFine, tmpFineDB)
		handleFatalError("Error diamond-blasting (p-search) fine database", err)

		// Delete the temporary fine database.
		if !flagNoCleanup {
			err := os.RemoveAll(tmpFineDB)
			err = os.RemoveAll(tmpFineDB + ".dmnd")
			handleFatalError("Could not delete fine DIAMOND database", err)
		}

	} else {

		// Create the fine blast db in a temporary directory
		mica.Vprintln("Building fine BLAST database...")
		tmpFineDB, err := makeFineBlastDB(db, searchBuf)
		handleFatalError("Could not create fine blast database to search on", err)

		// retrieve the cluster members for the original representative query seq

		// pass them to blastx on the expanded (fine) db

		// Finally, run the query against the fine fasta database and pass on the
		// stdout and stderr...
		bs, err := ioutil.ReadAll(nuclQueryFile)
		if err != nil {
			return fmt.Errorf("Could not read input fasta query: %s", err)
		}
		nuclQueryReader := bytes.NewReader(bs)

		err = blastFine(db, tmpFineDB, nuclQueryReader)
		handleFatalError("Error blasting fine database (p-search):", err)

		// Delete the temporary fine database.
		if !flagNoCleanup {
			err := os.RemoveAll(tmpFineDB)
			handleFatalError("Could not delete fine BLAST database", err)
		}
	}

	return nil
}
Beispiel #9
0
func processCompressedQueries(db *mica.DB, nuclQueryFileLoc string) error {

	queryDbLoc, err := compressQueries(nuclQueryFileLoc)
	if err != nil {
		return fmt.Errorf("Error compressing queries: %s\n", err)
	}

	queryDb, err := mica.NewReadDB(queryDbLoc)
	if err != nil {
		return fmt.Errorf("Error opening newly created compressed query db: %s\n", err)
	}

	mica.Vprintln("\nBlasting with diamond query on coarse database...")
	dmndOutDaaFilename, err := dmndBlastXCoarse(db, queryDb.CoarseDB.FileFasta.Name())
	if err != nil {
		return fmt.Errorf("Error blasting with diamond on coarse database from compressed queries: %s\n", err)
	}

	dmndOutFile, err := convertDmndToBlastTabular(dmndOutDaaFilename)
	if err != nil {
		return fmt.Errorf("Error converting diamond output to blast tabular: %s\n")
	}

	mica.Vprintln("Decompressing diamond hits...")
	dmndOutArr, err := ioutil.ReadAll(dmndOutFile)

	// nuclQueryFile, err := os.Open(nuclQueryFileLoc)
	// if err != nil {
	// 	fatalf("Could not open '%s' query file for fine search: %s\n", nuclQueryFileLoc, err)
	// }

	if !flagNoCleanup {
		err := os.RemoveAll(dmndOutFile.Name())
		handleFatalError("Could not delete diamond output from coarse search", err)
		err = os.RemoveAll(dmndOutDaaFilename)
		handleFatalError("Could not delete diamond output from coarse search", err)
		err = os.RemoveAll(queryDbLoc)
		handleFatalError("Could not delete diamond output from coarse search", err)
	}

	if err != nil {
		return fmt.Errorf("Could not read diamond output: %s", err)
	}
	if len(dmndOutArr) == 0 {
		return fmt.Errorf("No coarse hits. %s", "Aborting.")
	}
	mica.Vprintln("Expanding diamond hits (queries and targets)...")
	dmndOut := bytes.NewBuffer(dmndOutArr)
	expandedSequences, expandedQueries, err := expandDmndHitsAndQuery(db, queryDb, dmndOut)
	if err != nil {
		return fmt.Errorf("%s\n", err)
	}

	// Write the contents of the expanded sequences to a fasta file.
	// It is then indexed using makeblastdb.
	searchBuf := new(bytes.Buffer)
	if err := writeFasta(expandedSequences, searchBuf); err != nil {
		fatalf("Could not create FASTA input from coarse hits: %s\n", err)
	}

	qSearchBuf := new(bytes.Buffer)
	if err := writeFasta(expandedQueries, qSearchBuf); err != nil {
		fatalf("Could not create FASTA input from coarse QUERY hits: %s\n", err)
	}
	fineQueryFile, err := ioutil.TempFile(flagTempFileDir, "fine-query-sequences")
	if err != nil {
		fatalf("Could not create temporary QUERY sequence file: %s\n", err)
	}
	err = ioutil.WriteFile(fineQueryFile.Name(), qSearchBuf.Bytes(), 0666)
	if err != nil {
		fatalf("Could not write to temporary QUERY sequence file: %s\n", err)
	}

	if flagDmndFine != "" {

		mica.Vprintln("Building fine DIAMOND database...")
		tmpFineDB, err := makeFineDmndDB(searchBuf)
		handleFatalError("Could not create fine diamond database to search on", err)

		err = dmndBlastXFine(fineQueryFile.Name(), flagDmndFine, tmpFineDB)
		handleFatalError("Error diamond-blasting (x-search) fine database", err)

		// Delete the temporary fine database.
		if !flagNoCleanup {
			err := os.RemoveAll(tmpFineDB)
			err = os.RemoveAll(tmpFineDB + ".dmnd")
			handleFatalError("Could not delete fine DIAMOND database", err)
		}

	} else {

		// Create the fine blast db in a temporary directory
		mica.Vprintln("Building fine BLAST database...")
		tmpFineDB, err := makeFineBlastDB(db, searchBuf)
		handleFatalError("Could not create fine blast database to search on", err)

		// retrieve the cluster members for the original representative query seq

		// pass them to blastx on the expanded (fine) db

		// Finally, run the query against the fine fasta database and pass on the
		// stdout and stderr...
		bs, err := ioutil.ReadAll(fineQueryFile)
		if err != nil {
			return fmt.Errorf("Could not read input fasta query: %s", err)
		}
		nuclQueryReader := bytes.NewReader(bs)

		err = blastFine(db, tmpFineDB, nuclQueryReader)
		handleFatalError("Error blasting fine database (x-search):", err)

		// Delete the temporary fine database.
		if !flagNoCleanup {
			err := os.RemoveAll(tmpFineDB)
			handleFatalError("Could not delete fine BLAST database", err)
		}
	}

	return nil
}