예제 #1
0
파일: mk_paired.go 프로젝트: TuftsBCB/flib
func mkPaired(c *command) {
	c.assertNArg(2)

	in := util.Library(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	util.AssertOverwritable(outPath, flagOverwrite)

	if _, ok := in.(fragbag.WeightedLibrary); ok {
		util.Fatalf("%s is a weighted library (not allowed)", in.Name())
	}

	name := fmt.Sprintf("paired-%s", in.Name())
	if fragbag.IsStructure(in) {
		var pairs [][]structure.Coords
		lib := in.(fragbag.StructureLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Atoms(i), lib.Atoms(j)
				pairs = append(pairs, append(f1, f2...))
			}
		}
		pairLib, err := fragbag.NewStructureAtoms(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "hmm") {
		var pairs []*seq.HMM
		lib := in.(fragbag.SequenceLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM)
				pairs = append(pairs, seq.HMMCat(f1, f2))
			}
		}
		pairLib, err := fragbag.NewSequenceHMM(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "profile") {
		util.Fatalf("Sequence profiles not implemented.")
	} else {
		util.Fatalf("Unrecognized fragment library: %s", in.Tag())
	}
}
예제 #2
0
func mkStructure(c *command) {
	c.assertNArg(2)

	brkFile := c.flags.Arg(0)
	saveto := c.flags.Arg(1)

	util.AssertOverwritable(saveto, flagOverwrite)

	brkContents, err := ioutil.ReadAll(util.OpenFile(c.flags.Arg(0)))
	util.Assert(err)

	pdbFragments := bytes.Split(brkContents, []byte("TER"))
	fragments := make([][]structure.Coords, 0)
	for i, pdbFrag := range pdbFragments {
		pdbFrag = bytes.TrimSpace(pdbFrag)
		if len(pdbFrag) == 0 {
			continue
		}
		fragments = append(fragments, coords(i, pdbFrag))
	}

	libName := stripExt(path.Base(brkFile))
	lib, err := fragbag.NewStructureAtoms(libName, fragments)
	util.Assert(err)
	fragbag.Save(util.CreateFile(saveto), lib)
}
예제 #3
0
파일: db.go 프로젝트: TuftsBCB/fragbag
// Create creates a new BOW database on disk at 'dir'. If the directory
// already exists or cannot be created, an error is returned.
//
// When you're finished adding entries, you must call Close.
//
// Once a BOW database is created, it cannot be modified. (This restriction
// may be lifted in the future.)
func Create(lib fragbag.Library, fpath string) (*DB, error) {
	if _, err := os.Stat(fpath); err == nil || !os.IsNotExist(err) {
		return nil, fmt.Errorf("BOW database '%s' already exists.", fpath)
	}
	outf, err := os.Create(fpath)
	if err != nil {
		return nil, err
	}

	db := &DB{
		Lib:  lib,
		Name: path.Base(fpath),

		tw:          tar.NewWriter(outf),
		saveBuf:     new(bytes.Buffer),
		writeBuf:    new(bytes.Buffer),
		entryChan:   make(chan bow.Bowed),
		writingDone: make(chan struct{}),
	}

	// Put all bow DB files in a directory within the archive.
	hdrDir := db.newHdrDir(db.dirName())
	if err := db.tw.WriteHeader(hdrDir); err != nil {
		return nil, err
	}

	// Create an entry for the fragment library. Copy the bytes.
	flibBytes := new(bytes.Buffer)
	if err := fragbag.Save(flibBytes, db.Lib); err != nil {
		return nil, fmt.Errorf("Could not copy fragment library: %s", err)
	}
	hdr := db.newHdr(fileFragLib, flibBytes.Len())
	if err := db.tw.WriteHeader(hdr); err != nil {
		return nil, err
	}
	if _, err := db.tw.Write(flibBytes.Bytes()); err != nil {
		return nil, err
	}

	// Now spin up a goroutine that is responsible for writing entries.
	go func() {
		for entry := range db.entryChan {
			if err = db.write(entry); err != nil {
				log.Printf("Could not write to %s: %s", fileBowDB, err)
			}
		}
		db.writingDone <- struct{}{}
	}()
	return db, nil
}
예제 #4
0
func mkSeqProfile(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Initialize a frequency and null profile for each structural fragment.
	var freqProfiles []*seq.FrequencyProfile
	var fpChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		fp := seq.NewFrequencyProfile(structLib.FragmentSize())
		freqProfiles = append(freqProfiles, fp)
		fpChans = append(fpChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	nullChan, nullProfile := addToNull()
	for i := 0; i < structLib.Size(); i++ {
		addToProfile(fpChans[i], freqProfiles[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nullChan, fpChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	close(nullChan)
	for i := 0; i < structLib.Size(); i++ {
		close(fpChans[i])
	}
	wgSeqFragments.Wait()

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	profs := make([]*seq.Profile, structLib.Size())
	for i := 0; i < structLib.Size(); i++ {
		profs[i] = freqProfiles[i].Profile(nullProfile)
	}
	lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
예제 #5
0
파일: mk_seq_hmm.go 프로젝트: TuftsBCB/flib
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}