// NewFragment constructs a new fragment from a full query sequence and the // hit from the HHR file. // // Since NewFragment requires access to the raw PDB alpha-carbon atoms (and // the sequence) of the template hit, you'll also need to pass a path to the // PDB database. (Which is a directory containing a flat list of all // PDB files used to construct the corresponding hhblits database.) This // database is usually located inside the 'pdb' directory contained in the // corresponding hhsuite database. i.e., $HHLIB/data/pdb-select25/pdb func NewFragment( pdbDb PDBDatabase, qs seq.Sequence, hit hhr.Hit) (Fragment, error) { pdbName := getTemplatePdbName(hit.Name) pdbEntry, err := pdb.ReadPDB(path.Join( pdbDb.PDB(), fmt.Sprintf("%s.pdb", pdbName))) if err != nil { pdbEntry, err = pdb.ReadPDB(path.Join( pdbDb.PDB(), fmt.Sprintf("%s.ent.gz", pdbName))) if err != nil { return Fragment{}, err } } // Load in the sequence from the PDB file using the SEQRES residues. ts, te := hit.TemplateStart, hit.TemplateEnd chain := pdbEntry.Chain(pdbName[4]) if chain == nil { return Fragment{}, fmt.Errorf("Could not find chain '%c' in PDB "+ "entry '%s'.", pdbName[4], pdbEntry.Path) } tseq := seq.Sequence{ Name: pdbName, Residues: make([]seq.Residue, te-ts+1), } // We copy here to avoid pinning pdb.Entry objects. copy(tseq.Residues, chain.Sequence[ts-1:te]) frag := Fragment{ Query: qs.Slice(hit.QueryStart-1, hit.QueryEnd), Template: tseq, Hit: hit, CaAtoms: nil, } // We designate "corrupt" if the query/template hit regions are of // different length. i.e., we don't allow gaps (yet). // BUG(burntsushi): Fragments with gaps are marked as corrupt. if hit.QueryEnd-hit.QueryStart != hit.TemplateEnd-hit.TemplateStart { return frag, nil } // We also designate "corrupt" if there are any gaps in our alpha-carbon // atom list. atoms := chain.SequenceCaAtomSlice(ts-1, te) if atoms == nil { return frag, nil } // One again, we copy to avoid pinning memory. frag.CaAtoms = make([]structure.Coords, len(atoms)) copy(frag.CaAtoms, atoms) return frag, nil }
func createChains(pdbFiles []string) []*pdb.Chain { chains := make([]*pdb.Chain, 0, len(pdbFiles)) for _, pdbFile := range pdbFiles { entry, err := pdb.ReadPDB(pdbFile) util.Warning(err, "Could not open PDB file '%s'", pdbFile) for _, chain := range entry.Chains { if !chain.IsProtein() { continue } chains = append(chains, chain) } } return chains }
func PDBOpen(fpath string) (*pdb.Entry, []*pdb.Chain, error) { pdbNameParse := func(fpath string) (string, []byte, string) { dir, base := path.Dir(fpath), path.Base(fpath) pieces := strings.Split(base, ":") var idents []byte base = pieces[0] if len(pieces) > 2 { Fatalf("Too many colons in PDB file path '%s'.", fpath) } else if len(pieces) == 2 { chains := strings.Split(pieces[1], ",") idents = make([]byte, len(chains)) for i := range chains { if len(chains[i]) > 1 { Fatalf("Chain '%s' is more than one character.", chains[i]) } idents[i] = byte(chains[i][0]) } } else if len(base) == 5 { // special case for '{pdb-id}{chain-id}' idents = []byte{base[4]} base = base[0:4] } if dir == "." { switch len(base) { case 4: return PDBPath(base), idents, base case 6: return CathPath(base), idents, base case 7: if base[0] == 'd' { return ScopPath(base), idents, base } else { return CathPath(base), idents, base } } } return path.Join(dir, base), idents, "" } fp, idents, idcode := pdbNameParse(fpath) entry, err := pdb.ReadPDB(fp) if err != nil { err = fmt.Errorf("Error reading '%s': %s", fp, err) return nil, nil, err } if len(idcode) > 0 { if len(idcode) == 6 || (len(idcode) == 7 && idcode[0] != 'd') { entry.Cath = idcode } else if len(idcode) == 7 && idcode[0] == 'd' { entry.Scop = idcode } } var chains []*pdb.Chain if len(idents) == 0 { chains = entry.Chains } else { chains = make([]*pdb.Chain, 0, 5) for _, c := range idents { chain := entry.Chain(c) if chain == nil { Warnf("Chain '%c' does not exist for '%s'.", c, entry.IdCode) continue } chains = append(chains, chain) } } return entry, chains, nil }
func PDBRead(path string) *pdb.Entry { entry, err := pdb.ReadPDB(path) Assert(err, "Could not open PDB file '%s'", path) return entry }