func aminoFromStructure(chain *pdb.Chain) seq.Sequence { var name string if len(chain.Entry.Cath) > 0 { name = chain.Entry.Cath } else if len(chain.Entry.Scop) > 0 { name = chain.Entry.Scop } else { name = fmt.Sprintf("%s%c", chain.Entry.IdCode, chain.Ident) } s := seq.Sequence{ Name: name, Residues: make([]seq.Residue, 0, 50), } lasti := 0 for _, r := range chain.Models[0].Residues { if lasti != r.SequenceNum { s.Residues = append(s.Residues, r.Name) lasti = r.SequenceNum } } return s }
func main() { pdbEntry := util.PDBRead(flag.Arg(0)) fasEntries := make([]seq.Sequence, 0, 5) if !flagSeparateChains { var fasEntry seq.Sequence if len(pdbEntry.Chains) == 1 { fasEntry.Name = chainHeader(pdbEntry.OneChain()) } else { fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode)) } seq := make([]seq.Residue, 0, 100) for _, chain := range pdbEntry.Chains { if isChainUsable(chain) { seq = append(seq, chain.Sequence...) } } fasEntry.Residues = seq if len(fasEntry.Residues) == 0 { util.Fatalf("Could not find any amino acids.") } fasEntries = append(fasEntries, fasEntry) } else { for _, chain := range pdbEntry.Chains { if !isChainUsable(chain) { continue } fasEntry := seq.Sequence{ Name: chainHeader(chain), Residues: chain.Sequence, } fasEntries = append(fasEntries, fasEntry) } } if len(fasEntries) == 0 { util.Fatalf("Could not find any chains with amino acids.") } var fasOut io.Writer if flag.NArg() == 1 { fasOut = os.Stdout } else { if len(flagSplit) > 0 { util.Fatalf("The '--split' option is incompatible with a single " + "output file.") } fasOut = util.CreateFile(util.Arg(1)) } if len(flagSplit) == 0 { util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries), "Could not write FASTA file '%s'", fasOut) } else { for _, entry := range fasEntries { fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name)) out := util.CreateFile(fp) w := fasta.NewWriter(out) util.Assert(w.Write(entry), "Could not write to '%s'", fp) util.Assert(w.Flush(), "Could not write to '%s'", fp) } } }
// ReadSequence is exported for use in other packages that read FASTA-like // files. // // The 'translate' function is used when sequences are checked for valid // characters. // // If you're just reading FASTA files, this method SHOULD NOT be used. func (r *Reader) ReadSequence(translate Translator) (seq.Sequence, error) { s := seq.Sequence{} seenHeader := false // Before entering the main loop, we have to check to see if we've // already read this entry's header. if r.nextHeader != nil { s.Name = trimHeader(r.nextHeader) r.nextHeader = nil seenHeader = true } for { line, err := r.buf.ReadBytes('\n') if err == io.EOF { if len(line) == 0 { return s, io.EOF } } else if err != nil { return seq.Sequence{}, fmt.Errorf("Error on line %d: %s", r.line, err) } line = bytes.TrimSpace(line) // If it's empty, increment the counter and skip ahead. if len(line) == 0 { r.line++ continue } // If the line starts with PIR junk, ignore the line. if bytes.HasPrefix(line, []byte("C;")) || bytes.HasPrefix(line, []byte("structure")) || bytes.HasPrefix(line, []byte("sequence")) { r.line++ continue } // If we haven't seen the header yet, this better be it. if !seenHeader { if line[0] != '>' { return seq.Sequence{}, fmt.Errorf("Expected '>', got '%c' on line %d.", line[0], r.line) } // Trim the '>' and load this line into the header. s.Name = trimHeader(line) seenHeader = true r.line++ continue } else if line[0] == '>' { // This means we've begun reading the next entry. // So slap this line into 'nextHeader' and return the current entry. r.nextHeader = line r.line++ return s, nil } // Finally, time to start reading the sequence. // If we trust the sequences, then we can just append this line // willy nilly. Otherwise we've got to check each character. if s.Residues == nil { s.Residues = make([]seq.Residue, 0, 50) } if r.TrustSequences { for _, b := range line { s.Residues = append(s.Residues, seq.Residue(b)) } } else { for _, b := range line { bNew, ok := translate(b) if !ok { return seq.Sequence{}, fmt.Errorf("Invalid character '%c' on line %d.", b, r.line) } // If the zero byte is returned from translate, then we // don't keep this residue around. if bNew > 0 { s.Residues = append(s.Residues, bNew) } } } r.line++ } panic("unreachable") }