func (c *cmdCr) run() { numDB := 10 c.sizeDB = 1 << 30 var err error c.env, err = createEnv(c.dbfile, numDB, c.sizeDB) for lmdb.IsMapFull(err) { c.sizeDB *= 2 c.env, err = createEnv(c.dbfile, numDB, c.sizeDB) } raiseError(err) defer c.env.Close() // open feature db var sizeDB int64 = 1 << 30 c.featureEnv, err = createNoLockEnv(c.featureDbPath, numDB, sizeDB) for lmdb.IsMapFull(err) { sizeDB *= 2 c.featureEnv, err = createNoLockEnv(c.featureDbPath, numDB, sizeDB) } raiseError(err) defer c.featureEnv.Close() createDBI(c.env, "cr") snpChan := c.readSNPs() crChan := c.calculateCr(snpChan) c.load(crChan) }
func (c *cmdRead) run() { // create environment and dbi. numDB := 10 c.sizeDB = 1 << 30 var err error c.env, err = createEnv(c.dbfile, numDB, c.sizeDB) for lmdb.IsMapFull(err) { c.sizeDB *= 2 c.env, err = createEnv(c.dbfile, numDB, c.sizeDB) } raiseError(err) defer c.env.Close() var sizeDB int64 = 1 << 30 c.featureEnv, err = createNoLockEnv(c.featureDB, numDB, sizeDB) for lmdb.IsMapFull(err) { sizeDB *= 2 c.featureEnv, err = createNoLockEnv(c.featureDB, numDB, sizeDB) } raiseError(err) defer c.featureEnv.Close() createDBI(c.env, "gene") // open pileup file and read SNP. snpChan := readPileup(c.pileupFile) // group SNPs into genes. geneChan := c.groupSNPs(snpChan) c.loadGenes(geneChan) err = c.env.View(func(tx *lmdb.Txn) error { dbi, err := tx.OpenDBI("gene", 0) if err != nil { return err } cur, err := tx.OpenCursor(dbi) if err != nil { return err } count := 0 for { _, _, err := cur.Get(nil, nil, lmdb.Next) if lmdb.IsNotFound(err) { return nil } if err != nil { return err } count++ } log.Printf("Total gene: %d\n", count) return nil }) if err != nil { log.Panicln(err) } }
func (c *cmdFeat) loadFna(accessions []string, genomes [][]byte) { fn := func(txn *lmdb.Txn) error { dbi, err := txn.OpenDBI("fna", 0) if err != nil { return err } for i := range accessions { key := []byte(accessions[i]) val := genomes[i] err := txn.Put(dbi, key, val, 0) if err != nil { return err } } return nil } retry: err := c.env.Update(fn) if lmdb.IsMapFull(err) { c.sizeDB = c.sizeDB * 2 err = c.env.SetMapSize(c.sizeDB) raiseError(err) if *debug { log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0)) } goto retry } raiseError(err) }
func (c *cmdFeat) loadFeatures(features []Feature) { fn := func(txn *lmdb.Txn) error { genomeGeneMap := make(map[string][]string) for _, f := range features { genomeGeneMap[f.Genome] = append(genomeGeneMap[f.Genome], f.PatricID) } var dbi lmdb.DBI var err error dbi, err = txn.OpenDBI("genome", 0) if err != nil { return err } for genome, ids := range genomeGeneMap { key := []byte(genome) value, err := msgpack.Marshal(ids) if err != nil { return err } if err := txn.Put(dbi, key, value, 0); err != nil { return err } } dbi, err = txn.OpenDBI("feature", 0) if err != nil { return err } for _, f := range features { if f.PatricID == "" { continue } key := []byte(f.PatricID) value, err := msgpack.Marshal(f) if err != nil { return err } if err := txn.Put(dbi, key, value, 0); err != nil { return err } } return nil } retry: err := c.env.Update(fn) if lmdb.IsMapFull(err) { c.sizeDB = c.sizeDB * 2 err = c.env.SetMapSize(c.sizeDB) raiseError(err) if *debug { log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0)) } goto retry } raiseError(err) }
func (d *DiversityFilter) OpenFeatureDB(path string) { var numDB int = 10 var sizeDB int64 = 1 << 30 var err error d.featureDB, err = createNoLockEnv(path, numDB, sizeDB) for lmdb.IsMapFull(err) { sizeDB *= 2 d.featureDB, err = createNoLockEnv(path, numDB, sizeDB) } raiseError(err) }
func (c *cmdFeat) run() { // create an environment and make sure it is eventually closed. var numDB int = 10 c.sizeDB = 1 * 1024 * 1024 * 1024 var err error // create to create env c.env, err = createEnv(c.out, numDB, c.sizeDB) for lmdb.IsMapFull(err) { c.sizeDB *= 2 c.env, err = createEnv(c.out, numDB, c.sizeDB) } raiseError(err) defer c.env.Close() createDBI(c.env, "feature") createDBI(c.env, "genome") featureFileList := c.walk(".PATRIC.features.tab") for _, featureFile := range featureFileList { features := readFeatures(featureFile) c.loadFeatures(features) } err = createDBI(c.env, "fna") for lmdb.IsMapFull(err) { c.sizeDB = c.sizeDB * 2 err = c.env.SetMapSize(c.sizeDB) raiseError(err) if *debug { log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0)) } err = createDBI(c.env, "fna") } raiseError(err) fnaFileList := c.walk(".fna") getAcc := func(s string) string { return strings.Split(strings.TrimSpace(s), " ")[0] } for _, fnaFile := range fnaFileList { accessions, genomes := readFna(fnaFile, getAcc) c.loadFna(accessions, genomes) } }
// create temp db func (d *DiversityFilter) CreateTempDB(path string) { var numDB int = 10 d.sizeDB = 1 << 30 var err error d.db, err = createEnv(path, numDB, d.sizeDB) for lmdb.IsMapFull(err) { d.sizeDB *= 2 d.db, err = createEnv(path, numDB, d.sizeDB) } raiseError(err) err = createDBI(d.db, "read") raiseError(err) }
func (c *cmdMerge) run() { numDB := 10 c.sizeDB = 1 << 30 env, err := createEnv(c.dbOut, numDB, c.sizeDB) for lmdb.IsMapFull(err) { c.sizeDB *= 2 env, err = createEnv(c.dbOut, numDB, c.sizeDB) } raiseError(err) defer env.Close() createDBI(env, c.dbiName) fn := func(txn *lmdb.Txn) error { dbi, err := txn.OpenDBI(c.dbiName, 0) if err != nil { return err } kvChan := c.readAllCr() for kv := range kvChan { key, val := kv.Key, kv.Value err := txn.Put(dbi, key, val, 0) if err != nil { return err } } return nil } err = env.Update(fn) if err != nil { if *debug { log.Panicln(err) } else { log.Fatalln(err) } } }
func (d *DiversityFilter) filter(buf []*sam.Record, acc string, genome []byte) (out []*sam.Record, acc1 string, genome1 []byte) { fn := func(txn *lmdb.Txn) error { dbi, err := txn.OpenDBI("read", 0) if err != nil { return err } for _, r := range buf { key := []byte(r.Name) val, err := txn.Get(dbi, key) if err != nil { if lmdb.IsNotFound(err) { val, err = r.MarshalText() if err != nil { return err } err = txn.Put(dbi, key, val, 0) if err != nil { return err } } else { return err } } else { var mate *sam.Record = &sam.Record{} err := mate.UnmarshalText(val) raiseError(err) if r.Ref.Name() == mate.Ref.Name() { if acc != r.Ref.Name() { genome, err = d.findGenome(r, d.featureDB, "fna") raiseError(err) acc = r.Ref.Name() if *debug { log.Println(acc) } } diff1, len1 := d.Diff(r, genome) diff2, len2 := d.Diff(mate, genome) if len1 > 0 && len2 > 0 && float64(diff1+diff2)/float64(len1+len2) <= d.Cutoff { out = append(out, r) out = append(out, mate) } else { if *debug { log.Printf("%d, %d, %d, %d\n", diff1, diff2, len1, len2) } } } txn.Del(dbi, key, val) } } return nil } retry: err := d.db.Update(fn) if lmdb.IsMapFull(err) { d.sizeDB *= 2 err = d.db.SetMapSize(d.sizeDB) raiseError(err) goto retry } raiseError(err) genome1 = genome acc1 = acc return }
func main() { command := kingpin.MustParse(app.Parse(os.Args[1:])) runtime.GOMAXPROCS(*ncpu) switch command { case filterApp.FullCommand(): filtercmd := cmdFilter{ featureDB: *filterFeatureDB, bamFile: *filterBam, outFile: *filterOut, maxDistance: *filterMaxDist, mapQ: *filterMapQ, } filtercmd.run() case featApp.FullCommand(): featcmd := cmdFeat{ out: *featOut, dir: *featDir, } featcmd.run() break case readApp.FullCommand(): readcmd := cmdRead{ pileupFile: *pileupFile, dbfile: *readOut, minCover: *readMinCover, minDepth: *readMinDepth, featureDB: *readFeature, } readcmd.run() break case reportApp.FullCommand(): var sizeDB int64 = 1 << 30 numDB := 10 featureDB, err := createNoLockEnv(*reportFeatureDB, numDB, sizeDB) for lmdb.IsMapFull(err) { sizeDB *= 2 featureDB, err = createNoLockEnv(*reportFeatureDB, numDB, sizeDB) } raiseError(err) defer featureDB.Close() sizeDB = 1 << 30 resultsDB, err := createNoLockEnv(*reportResultsDB, numDB, sizeDB) for lmdb.IsMapFull(err) { sizeDB *= 2 resultsDB, err = createNoLockEnv(*reportResultsDB, numDB, sizeDB) } raiseError(err) defer resultsDB.Close() reportcmd := cmdReport{ featureDB: featureDB, resultsDB: resultsDB, prefix: *reportPrefix, maxl: *reportMaxl, } reportcmd.run() break // case report2App.FullCommand(): // featureDB := createNoLockEnv(*report2FeatureDB, 10, 0) // defer featureDB.Close() // resultsDB := createReadOnlyEnv(*report2ResultsDB, 10, 0) // defer resultsDB.Close() // reportcmd2 := cmdReport2{ // featureDB: featureDB, // resultsDB: resultsDB, // prefix: *report2Prefix, // } // reportcmd2.run() // break case covApp.FullCommand(): crcmd := cmdCr{ dbfile: *covResultsDb, codonID: *covGC, featureDbPath: *covFeatureDb, minDepth: *covMinDepth, } crcmd.run() break case mergeApp.FullCommand(): mergecmd := cmdMerge{ sampleFile: *mergeSampleFile, dbiName: *mergeDbiName, dbOut: *mergeOutDb, } mergecmd.run() break } }