Beispiel #1
0
func mkPaired(c *command) {
	c.assertNArg(2)

	in := util.Library(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	util.AssertOverwritable(outPath, flagOverwrite)

	if _, ok := in.(fragbag.WeightedLibrary); ok {
		util.Fatalf("%s is a weighted library (not allowed)", in.Name())
	}

	name := fmt.Sprintf("paired-%s", in.Name())
	if fragbag.IsStructure(in) {
		var pairs [][]structure.Coords
		lib := in.(fragbag.StructureLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Atoms(i), lib.Atoms(j)
				pairs = append(pairs, append(f1, f2...))
			}
		}
		pairLib, err := fragbag.NewStructureAtoms(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "hmm") {
		var pairs []*seq.HMM
		lib := in.(fragbag.SequenceLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM)
				pairs = append(pairs, seq.HMMCat(f1, f2))
			}
		}
		pairLib, err := fragbag.NewSequenceHMM(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "profile") {
		util.Fatalf("Sequence profiles not implemented.")
	} else {
		util.Fatalf("Unrecognized fragment library: %s", in.Tag())
	}
}
Beispiel #2
0
func mkBowDb(c *command) {
	c.assertLeastNArg(3)

	dbPath := c.flags.Arg(0)
	flib := util.Library(c.flags.Arg(1))
	bowPaths := c.flags.Args()[2:]

	util.AssertOverwritable(dbPath, flagOverwrite)

	db, err := bowdb.Create(flib, dbPath)
	util.Assert(err)

	bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet)
	for b := range bows {
		db.Add(b)
	}
	util.Assert(db.Close())
}
Beispiel #3
0
func mkWeighted(c *command) {
	c.assertLeastNArg(4)

	train := util.Library(c.flags.Arg(0))
	in := util.Library(c.flags.Arg(1))
	outPath := c.flags.Arg(2)
	bowPaths := c.flags.Args()[3:]

	util.AssertOverwritable(outPath, flagOverwrite)

	// The inverse-document-frequencies of each fragment in the "in" fragment
	// library.
	numFrags := in.Size()
	idfs := make([]float32, numFrags)
	for i := range idfs {
		idfs[i] = 1 // pseudocount
	}

	// Compute the BOWs for each bower against the training fragment lib.
	bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet)

	// Now tally the number of bowers that each fragment occurred in.
	totalBows := float32(1) // for pseudocount correction
	for bow := range bows {
		totalBows += 1
		for fragi := 0; fragi < numFrags; fragi++ {
			if bow.Bow.Freqs[fragi] > 0 {
				idfs[fragi]++
			}
		}
	}

	// Compute the IDF using the frequencies against all the BOWs.
	for i := range idfs {
		idfs[i] = float32(math.Log(float64(totalBows / idfs[i])))
	}

	// Finally, wrap the given library as a weighted library and save it.
	wlib, err := fragbag.NewWeightedTfIdf(in, idfs)
	util.Assert(err)
	fragbag.Save(util.CreateFile(outPath), wlib)
}
Beispiel #4
0
func mkSeqProfile(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Initialize a frequency and null profile for each structural fragment.
	var freqProfiles []*seq.FrequencyProfile
	var fpChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		fp := seq.NewFrequencyProfile(structLib.FragmentSize())
		freqProfiles = append(freqProfiles, fp)
		fpChans = append(fpChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	nullChan, nullProfile := addToNull()
	for i := 0; i < structLib.Size(); i++ {
		addToProfile(fpChans[i], freqProfiles[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nullChan, fpChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	close(nullChan)
	for i := 0; i < structLib.Size(); i++ {
		close(fpChans[i])
	}
	wgSeqFragments.Wait()

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	profs := make([]*seq.Profile, structLib.Size())
	for i := 0; i < structLib.Size(); i++ {
		profs[i] = freqProfiles[i].Profile(nullProfile)
	}
	lib, err := fragbag.NewSequenceProfile(structLib.Name(), profs)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}
Beispiel #5
0
func mkSeqHMM(c *command) {
	c.assertLeastNArg(3)

	structLib := util.StructureLibrary(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	entries := c.flags.Args()[2:]

	util.AssertOverwritable(outPath, flagOverwrite)
	saveto := util.CreateFile(outPath)

	// Stores intermediate files produced by hhmake.
	tempDir, err := ioutil.TempDir("", "mk-seqlib-hmm")
	util.Assert(err, "Could not create temporary directory.")
	defer os.RemoveAll(tempDir)

	// Initialize a MSA for each structural fragment.
	var msas []seq.MSA
	var msaChans []chan seq.Sequence
	for i := 0; i < structLib.Size(); i++ {
		msa := seq.NewMSA()
		msa.SetLen(structLib.FragmentSize())
		msas = append(msas, msa)
		msaChans = append(msaChans, make(chan seq.Sequence))
	}

	// Now spin up a goroutine for each fragment that is responsible for
	// adding a sequence slice to itself.
	for i := 0; i < structLib.Size(); i++ {
		addToMSA(msaChans[i], &msas[i])
	}

	// Create a channel that sends the PDB entries given.
	entryChan := make(chan string)
	go func() {
		for _, fp := range entries {
			entryChan <- fp
		}
		close(entryChan)
	}()

	progress := util.NewProgress(len(entries))
	for i := 0; i < flagCpu; i++ {
		wgPDBChains.Add(1)
		go func() {
			for entryPath := range entryChan {
				_, chains, err := util.PDBOpen(entryPath)
				progress.JobDone(err)
				if err != nil {
					continue
				}

				for _, chain := range chains {
					structureToSequence(structLib, chain, nil, msaChans)
				}
			}
			wgPDBChains.Done()
		}()
	}
	wgPDBChains.Wait()
	progress.Close()

	// We've finishing reading all the PDB inputs. Now close the channels
	// and let the sequence fragments finish.
	for i := 0; i < structLib.Size(); i++ {
		close(msaChans[i])
	}
	wgSeqFragments.Wait()

	util.Verbosef("Building profile HMMs from MSAs...")

	// Finally, add the sequence fragments to a new sequence fragment
	// library and save.
	hmms := make([]*seq.HMM, structLib.Size())
	hhmake := func(i int) struct{} {
		fname := path.Join(tempDir, fmt.Sprintf("%d.fasta", i))
		f := util.CreateFile(fname)
		util.Assert(msa.WriteFasta(f, msas[i]))

		hhm, err := hhsuite.HHMakePseudo.Run(fname)
		util.Assert(err)
		hmms[i] = hhm.HMM
		return struct{}{} // my unifier sucks, i guess
	}
	fun.ParMap(hhmake, fun.Range(0, structLib.Size()))

	lib, err := fragbag.NewSequenceHMM(structLib.Name(), hmms)
	util.Assert(err)
	util.Assert(fragbag.Save(saveto, lib))
}