Ejemplo n.º 1
0
func (c *cmdCr) run() {
	numDB := 10
	c.sizeDB = 1 << 30
	var err error
	c.env, err = createEnv(c.dbfile, numDB, c.sizeDB)
	for lmdb.IsMapFull(err) {
		c.sizeDB *= 2
		c.env, err = createEnv(c.dbfile, numDB, c.sizeDB)
	}
	raiseError(err)
	defer c.env.Close()

	// open feature db
	var sizeDB int64 = 1 << 30

	c.featureEnv, err = createNoLockEnv(c.featureDbPath, numDB, sizeDB)
	for lmdb.IsMapFull(err) {
		sizeDB *= 2
		c.featureEnv, err = createNoLockEnv(c.featureDbPath, numDB, sizeDB)
	}
	raiseError(err)
	defer c.featureEnv.Close()

	createDBI(c.env, "cr")
	snpChan := c.readSNPs()
	crChan := c.calculateCr(snpChan)
	c.load(crChan)
}
Ejemplo n.º 2
0
func (c *cmdRead) run() {
	// create environment and dbi.
	numDB := 10
	c.sizeDB = 1 << 30
	var err error
	c.env, err = createEnv(c.dbfile, numDB, c.sizeDB)
	for lmdb.IsMapFull(err) {
		c.sizeDB *= 2
		c.env, err = createEnv(c.dbfile, numDB, c.sizeDB)
	}
	raiseError(err)
	defer c.env.Close()

	var sizeDB int64 = 1 << 30
	c.featureEnv, err = createNoLockEnv(c.featureDB, numDB, sizeDB)
	for lmdb.IsMapFull(err) {
		sizeDB *= 2
		c.featureEnv, err = createNoLockEnv(c.featureDB, numDB, sizeDB)
	}
	raiseError(err)
	defer c.featureEnv.Close()

	createDBI(c.env, "gene")
	// open pileup file and read SNP.
	snpChan := readPileup(c.pileupFile)
	// group SNPs into genes.
	geneChan := c.groupSNPs(snpChan)
	c.loadGenes(geneChan)

	err = c.env.View(func(tx *lmdb.Txn) error {
		dbi, err := tx.OpenDBI("gene", 0)
		if err != nil {
			return err
		}
		cur, err := tx.OpenCursor(dbi)
		if err != nil {
			return err
		}
		count := 0
		for {
			_, _, err := cur.Get(nil, nil, lmdb.Next)
			if lmdb.IsNotFound(err) {
				return nil
			}
			if err != nil {
				return err
			}
			count++
		}
		log.Printf("Total gene: %d\n", count)
		return nil
	})
	if err != nil {
		log.Panicln(err)
	}
}
Ejemplo n.º 3
0
func (c *cmdFeat) loadFna(accessions []string, genomes [][]byte) {
	fn := func(txn *lmdb.Txn) error {
		dbi, err := txn.OpenDBI("fna", 0)
		if err != nil {
			return err
		}
		for i := range accessions {
			key := []byte(accessions[i])
			val := genomes[i]
			err := txn.Put(dbi, key, val, 0)
			if err != nil {
				return err
			}
		}
		return nil
	}

retry:
	err := c.env.Update(fn)
	if lmdb.IsMapFull(err) {
		c.sizeDB = c.sizeDB * 2
		err = c.env.SetMapSize(c.sizeDB)
		raiseError(err)
		if *debug {
			log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0))
		}
		goto retry
	}
	raiseError(err)
}
Ejemplo n.º 4
0
func (c *cmdFeat) loadFeatures(features []Feature) {
	fn := func(txn *lmdb.Txn) error {
		genomeGeneMap := make(map[string][]string)
		for _, f := range features {
			genomeGeneMap[f.Genome] = append(genomeGeneMap[f.Genome], f.PatricID)
		}

		var dbi lmdb.DBI
		var err error
		dbi, err = txn.OpenDBI("genome", 0)
		if err != nil {
			return err
		}
		for genome, ids := range genomeGeneMap {
			key := []byte(genome)
			value, err := msgpack.Marshal(ids)
			if err != nil {
				return err
			}
			if err := txn.Put(dbi, key, value, 0); err != nil {
				return err
			}
		}

		dbi, err = txn.OpenDBI("feature", 0)
		if err != nil {
			return err
		}
		for _, f := range features {
			if f.PatricID == "" {
				continue
			}
			key := []byte(f.PatricID)
			value, err := msgpack.Marshal(f)
			if err != nil {
				return err
			}
			if err := txn.Put(dbi, key, value, 0); err != nil {
				return err
			}
		}

		return nil
	}

retry:
	err := c.env.Update(fn)
	if lmdb.IsMapFull(err) {
		c.sizeDB = c.sizeDB * 2
		err = c.env.SetMapSize(c.sizeDB)
		raiseError(err)
		if *debug {
			log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0))
		}
		goto retry
	}
	raiseError(err)
}
Ejemplo n.º 5
0
func (d *DiversityFilter) OpenFeatureDB(path string) {
	var numDB int = 10
	var sizeDB int64 = 1 << 30
	var err error
	d.featureDB, err = createNoLockEnv(path, numDB, sizeDB)
	for lmdb.IsMapFull(err) {
		sizeDB *= 2
		d.featureDB, err = createNoLockEnv(path, numDB, sizeDB)
	}
	raiseError(err)
}
Ejemplo n.º 6
0
func (c *cmdFeat) run() {
	// create an environment and make sure it is eventually closed.
	var numDB int = 10
	c.sizeDB = 1 * 1024 * 1024 * 1024
	var err error
	// create to create env
	c.env, err = createEnv(c.out, numDB, c.sizeDB)
	for lmdb.IsMapFull(err) {
		c.sizeDB *= 2
		c.env, err = createEnv(c.out, numDB, c.sizeDB)
	}
	raiseError(err)
	defer c.env.Close()

	createDBI(c.env, "feature")
	createDBI(c.env, "genome")
	featureFileList := c.walk(".PATRIC.features.tab")
	for _, featureFile := range featureFileList {
		features := readFeatures(featureFile)
		c.loadFeatures(features)
	}

	err = createDBI(c.env, "fna")
	for lmdb.IsMapFull(err) {
		c.sizeDB = c.sizeDB * 2
		err = c.env.SetMapSize(c.sizeDB)
		raiseError(err)
		if *debug {
			log.Printf("increase max database size to %.2f G\n", float64(c.sizeDB)/(1024*1024*1024.0))
		}
		err = createDBI(c.env, "fna")
	}
	raiseError(err)
	fnaFileList := c.walk(".fna")
	getAcc := func(s string) string { return strings.Split(strings.TrimSpace(s), " ")[0] }
	for _, fnaFile := range fnaFileList {
		accessions, genomes := readFna(fnaFile, getAcc)
		c.loadFna(accessions, genomes)
	}
}
Ejemplo n.º 7
0
// create temp db
func (d *DiversityFilter) CreateTempDB(path string) {
	var numDB int = 10
	d.sizeDB = 1 << 30
	var err error
	d.db, err = createEnv(path, numDB, d.sizeDB)
	for lmdb.IsMapFull(err) {
		d.sizeDB *= 2
		d.db, err = createEnv(path, numDB, d.sizeDB)
	}
	raiseError(err)

	err = createDBI(d.db, "read")
	raiseError(err)
}
Ejemplo n.º 8
0
func (c *cmdMerge) run() {
	numDB := 10
	c.sizeDB = 1 << 30
	env, err := createEnv(c.dbOut, numDB, c.sizeDB)
	for lmdb.IsMapFull(err) {
		c.sizeDB *= 2
		env, err = createEnv(c.dbOut, numDB, c.sizeDB)
	}
	raiseError(err)
	defer env.Close()

	createDBI(env, c.dbiName)
	fn := func(txn *lmdb.Txn) error {
		dbi, err := txn.OpenDBI(c.dbiName, 0)
		if err != nil {
			return err
		}
		kvChan := c.readAllCr()
		for kv := range kvChan {
			key, val := kv.Key, kv.Value
			err := txn.Put(dbi, key, val, 0)
			if err != nil {
				return err
			}
		}

		return nil
	}

	err = env.Update(fn)
	if err != nil {
		if *debug {
			log.Panicln(err)
		} else {
			log.Fatalln(err)
		}
	}
}
Ejemplo n.º 9
0
func (d *DiversityFilter) filter(buf []*sam.Record, acc string, genome []byte) (out []*sam.Record, acc1 string, genome1 []byte) {
	fn := func(txn *lmdb.Txn) error {
		dbi, err := txn.OpenDBI("read", 0)
		if err != nil {
			return err
		}

		for _, r := range buf {
			key := []byte(r.Name)
			val, err := txn.Get(dbi, key)
			if err != nil {
				if lmdb.IsNotFound(err) {
					val, err = r.MarshalText()
					if err != nil {
						return err
					}
					err = txn.Put(dbi, key, val, 0)
					if err != nil {
						return err
					}
				} else {
					return err
				}
			} else {
				var mate *sam.Record = &sam.Record{}
				err := mate.UnmarshalText(val)
				raiseError(err)
				if r.Ref.Name() == mate.Ref.Name() {
					if acc != r.Ref.Name() {
						genome, err = d.findGenome(r, d.featureDB, "fna")
						raiseError(err)
						acc = r.Ref.Name()
						if *debug {
							log.Println(acc)
						}
					}

					diff1, len1 := d.Diff(r, genome)
					diff2, len2 := d.Diff(mate, genome)
					if len1 > 0 && len2 > 0 && float64(diff1+diff2)/float64(len1+len2) <= d.Cutoff {
						out = append(out, r)
						out = append(out, mate)
					} else {
						if *debug {
							log.Printf("%d, %d, %d, %d\n", diff1, diff2, len1, len2)
						}
					}
				}

				txn.Del(dbi, key, val)
			}

		}
		return nil
	}
retry:
	err := d.db.Update(fn)
	if lmdb.IsMapFull(err) {
		d.sizeDB *= 2
		err = d.db.SetMapSize(d.sizeDB)
		raiseError(err)
		goto retry
	}
	raiseError(err)

	genome1 = genome
	acc1 = acc
	return
}
Ejemplo n.º 10
0
func main() {
	command := kingpin.MustParse(app.Parse(os.Args[1:]))
	runtime.GOMAXPROCS(*ncpu)

	switch command {
	case filterApp.FullCommand():
		filtercmd := cmdFilter{
			featureDB:   *filterFeatureDB,
			bamFile:     *filterBam,
			outFile:     *filterOut,
			maxDistance: *filterMaxDist,
			mapQ:        *filterMapQ,
		}
		filtercmd.run()
	case featApp.FullCommand():
		featcmd := cmdFeat{
			out: *featOut,
			dir: *featDir,
		}
		featcmd.run()
		break
	case readApp.FullCommand():
		readcmd := cmdRead{
			pileupFile: *pileupFile,
			dbfile:     *readOut,
			minCover:   *readMinCover,
			minDepth:   *readMinDepth,
			featureDB:  *readFeature,
		}
		readcmd.run()
		break
	case reportApp.FullCommand():
		var sizeDB int64 = 1 << 30
		numDB := 10
		featureDB, err := createNoLockEnv(*reportFeatureDB, numDB, sizeDB)
		for lmdb.IsMapFull(err) {
			sizeDB *= 2
			featureDB, err = createNoLockEnv(*reportFeatureDB, numDB, sizeDB)
		}
		raiseError(err)
		defer featureDB.Close()

		sizeDB = 1 << 30
		resultsDB, err := createNoLockEnv(*reportResultsDB, numDB, sizeDB)
		for lmdb.IsMapFull(err) {
			sizeDB *= 2
			resultsDB, err = createNoLockEnv(*reportResultsDB, numDB, sizeDB)
		}
		raiseError(err)
		defer resultsDB.Close()
		reportcmd := cmdReport{
			featureDB: featureDB,
			resultsDB: resultsDB,
			prefix:    *reportPrefix,
			maxl:      *reportMaxl,
		}
		reportcmd.run()
		break
	// case report2App.FullCommand():
	// 	featureDB := createNoLockEnv(*report2FeatureDB, 10, 0)
	// 	defer featureDB.Close()
	// 	resultsDB := createReadOnlyEnv(*report2ResultsDB, 10, 0)
	// 	defer resultsDB.Close()
	// 	reportcmd2 := cmdReport2{
	// 		featureDB: featureDB,
	// 		resultsDB: resultsDB,
	// 		prefix:    *report2Prefix,
	// 	}
	// 	reportcmd2.run()
	// 	break
	case covApp.FullCommand():
		crcmd := cmdCr{
			dbfile:        *covResultsDb,
			codonID:       *covGC,
			featureDbPath: *covFeatureDb,
			minDepth:      *covMinDepth,
		}
		crcmd.run()
		break
	case mergeApp.FullCommand():
		mergecmd := cmdMerge{
			sampleFile: *mergeSampleFile,
			dbiName:    *mergeDbiName,
			dbOut:      *mergeOutDb,
		}
		mergecmd.run()
		break
	}
}