// NewFragment constructs a new fragment from a full query sequence and the // hit from the HHR file. // // Since NewFragment requires access to the raw PDB alpha-carbon atoms (and // the sequence) of the template hit, you'll also need to pass a path to the // PDB database. (Which is a directory containing a flat list of all // PDB files used to construct the corresponding hhblits database.) This // database is usually located inside the 'pdb' directory contained in the // corresponding hhsuite database. i.e., $HHLIB/data/pdb-select25/pdb func NewFragment( pdbDb PDBDatabase, qs seq.Sequence, hit hhr.Hit) (Fragment, error) { pdbName := getTemplatePdbName(hit.Name) pdbEntry, err := pdb.ReadPDB(path.Join( pdbDb.PDB(), fmt.Sprintf("%s.pdb", pdbName))) if err != nil { pdbEntry, err = pdb.ReadPDB(path.Join( pdbDb.PDB(), fmt.Sprintf("%s.ent.gz", pdbName))) if err != nil { return Fragment{}, err } } // Load in the sequence from the PDB file using the SEQRES residues. ts, te := hit.TemplateStart, hit.TemplateEnd chain := pdbEntry.Chain(pdbName[4]) if chain == nil { return Fragment{}, fmt.Errorf("Could not find chain '%c' in PDB "+ "entry '%s'.", pdbName[4], pdbEntry.Path) } tseq := seq.Sequence{ Name: pdbName, Residues: make([]seq.Residue, te-ts+1), } // We copy here to avoid pinning pdb.Entry objects. copy(tseq.Residues, chain.Sequence[ts-1:te]) frag := Fragment{ Query: qs.Slice(hit.QueryStart-1, hit.QueryEnd), Template: tseq, Hit: hit, CaAtoms: nil, } // We designate "corrupt" if the query/template hit regions are of // different length. i.e., we don't allow gaps (yet). // BUG(burntsushi): Fragments with gaps are marked as corrupt. if hit.QueryEnd-hit.QueryStart != hit.TemplateEnd-hit.TemplateStart { return frag, nil } // We also designate "corrupt" if there are any gaps in our alpha-carbon // atom list. atoms := chain.SequenceCaAtomSlice(ts-1, te) if atoms == nil { return frag, nil } // One again, we copy to avoid pinning memory. frag.CaAtoms = make([]structure.Coords, len(atoms)) copy(frag.CaAtoms, atoms) return frag, nil }
// SequenceBow is a helper function to compute a bag-of-words given a // sequence fragment library and a query sequence. // // If the lib given is a weighted library, then the BOW returned will also // be weighted. // // Note that this function should only be used when providing your own // implementation of the SequenceBower interface. Otherwise, BOWs should // be computed using the SequenceBow method of the interface. func SequenceBow(lib fragbag.SequenceLibrary, s seq.Sequence) Bow { var best, uplimit int b := NewBow(lib.Size()) libSize := lib.FragmentSize() uplimit = s.Len() - libSize for i := 0; i <= uplimit; i++ { best = lib.BestSequenceFragment(s.Slice(i, i+libSize)) if best < 0 { continue } b.Freqs[best] += 1 } if wlib, ok := lib.(fragbag.WeightedLibrary); ok { b = b.Weighted(wlib) } return b }