Пример #1
0
// NewFragment constructs a new fragment from a full query sequence and the
// hit from the HHR file.
//
// Since NewFragment requires access to the raw PDB alpha-carbon atoms (and
// the sequence) of the template hit, you'll also need to pass a path to the
// PDB database. (Which is a directory containing a flat list of all
// PDB files used to construct the corresponding hhblits database.) This
// database is usually located inside the 'pdb' directory contained in the
// corresponding hhsuite database. i.e., $HHLIB/data/pdb-select25/pdb
func NewFragment(
	pdbDb PDBDatabase, qs seq.Sequence, hit hhr.Hit) (Fragment, error) {

	pdbName := getTemplatePdbName(hit.Name)
	pdbEntry, err := pdb.ReadPDB(path.Join(
		pdbDb.PDB(), fmt.Sprintf("%s.pdb", pdbName)))
	if err != nil {
		pdbEntry, err = pdb.ReadPDB(path.Join(
			pdbDb.PDB(), fmt.Sprintf("%s.ent.gz", pdbName)))
		if err != nil {
			return Fragment{}, err
		}
	}

	// Load in the sequence from the PDB file using the SEQRES residues.
	ts, te := hit.TemplateStart, hit.TemplateEnd
	chain := pdbEntry.Chain(pdbName[4])
	if chain == nil {
		return Fragment{}, fmt.Errorf("Could not find chain '%c' in PDB "+
			"entry '%s'.", pdbName[4], pdbEntry.Path)
	}
	tseq := seq.Sequence{
		Name:     pdbName,
		Residues: make([]seq.Residue, te-ts+1),
	}

	// We copy here to avoid pinning pdb.Entry objects.
	copy(tseq.Residues, chain.Sequence[ts-1:te])

	frag := Fragment{
		Query:    qs.Slice(hit.QueryStart-1, hit.QueryEnd),
		Template: tseq,
		Hit:      hit,
		CaAtoms:  nil,
	}

	// We designate "corrupt" if the query/template hit regions are of
	// different length. i.e., we don't allow gaps (yet).
	// BUG(burntsushi): Fragments with gaps are marked as corrupt.
	if hit.QueryEnd-hit.QueryStart != hit.TemplateEnd-hit.TemplateStart {
		return frag, nil
	}

	// We also designate "corrupt" if there are any gaps in our alpha-carbon
	// atom list.
	atoms := chain.SequenceCaAtomSlice(ts-1, te)
	if atoms == nil {
		return frag, nil
	}

	// One again, we copy to avoid pinning memory.
	frag.CaAtoms = make([]structure.Coords, len(atoms))
	copy(frag.CaAtoms, atoms)

	return frag, nil
}
Пример #2
0
// SequenceBow is a helper function to compute a bag-of-words given a
// sequence fragment library and a query sequence.
//
// If the lib given is a weighted library, then the BOW returned will also
// be weighted.
//
// Note that this function should only be used when providing your own
// implementation of the SequenceBower interface. Otherwise, BOWs should
// be computed using the SequenceBow method of the interface.
func SequenceBow(lib fragbag.SequenceLibrary, s seq.Sequence) Bow {
	var best, uplimit int

	b := NewBow(lib.Size())
	libSize := lib.FragmentSize()
	uplimit = s.Len() - libSize
	for i := 0; i <= uplimit; i++ {
		best = lib.BestSequenceFragment(s.Slice(i, i+libSize))
		if best < 0 {
			continue
		}
		b.Freqs[best] += 1
	}
	if wlib, ok := lib.(fragbag.WeightedLibrary); ok {
		b = b.Weighted(wlib)
	}
	return b
}