Пример #1
0
func main() {
	pdbEntry := util.PDBRead(flag.Arg(0))

	fasEntries := make([]seq.Sequence, 0, 5)
	if !flagSeparateChains {
		var fasEntry seq.Sequence
		if len(pdbEntry.Chains) == 1 {
			fasEntry.Name = chainHeader(pdbEntry.OneChain())
		} else {
			fasEntry.Name = fmt.Sprintf("%s", strings.ToLower(pdbEntry.IdCode))
		}

		seq := make([]seq.Residue, 0, 100)
		for _, chain := range pdbEntry.Chains {
			if isChainUsable(chain) {
				seq = append(seq, chain.Sequence...)
			}
		}
		fasEntry.Residues = seq

		if len(fasEntry.Residues) == 0 {
			util.Fatalf("Could not find any amino acids.")
		}
		fasEntries = append(fasEntries, fasEntry)
	} else {
		for _, chain := range pdbEntry.Chains {
			if !isChainUsable(chain) {
				continue
			}

			fasEntry := seq.Sequence{
				Name:     chainHeader(chain),
				Residues: chain.Sequence,
			}
			fasEntries = append(fasEntries, fasEntry)
		}
	}
	if len(fasEntries) == 0 {
		util.Fatalf("Could not find any chains with amino acids.")
	}

	var fasOut io.Writer
	if flag.NArg() == 1 {
		fasOut = os.Stdout
	} else {
		if len(flagSplit) > 0 {
			util.Fatalf("The '--split' option is incompatible with a single " +
				"output file.")
		}
		fasOut = util.CreateFile(util.Arg(1))
	}

	if len(flagSplit) == 0 {
		util.Assert(fasta.NewWriter(fasOut).WriteAll(fasEntries),
			"Could not write FASTA file '%s'", fasOut)
	} else {
		for _, entry := range fasEntries {
			fp := path.Join(flagSplit, fmt.Sprintf("%s.fasta", entry.Name))
			out := util.CreateFile(fp)

			w := fasta.NewWriter(out)
			util.Assert(w.Write(entry), "Could not write to '%s'", fp)
			util.Assert(w.Flush(), "Could not write to '%s'", fp)
		}
	}
}
Пример #2
0
// ReadSequence is exported for use in other packages that read FASTA-like
// files.
//
// The 'translate' function is used when sequences are checked for valid
// characters.
//
// If you're just reading FASTA files, this method SHOULD NOT be used.
func (r *Reader) ReadSequence(translate Translator) (seq.Sequence, error) {
	s := seq.Sequence{}
	seenHeader := false

	// Before entering the main loop, we have to check to see if we've
	// already read this entry's header.
	if r.nextHeader != nil {
		s.Name = trimHeader(r.nextHeader)
		r.nextHeader = nil
		seenHeader = true
	}
	for {
		line, err := r.buf.ReadBytes('\n')
		if err == io.EOF {
			if len(line) == 0 {
				return s, io.EOF
			}
		} else if err != nil {
			return seq.Sequence{}, fmt.Errorf("Error on line %d: %s",
				r.line, err)
		}
		line = bytes.TrimSpace(line)

		// If it's empty, increment the counter and skip ahead.
		if len(line) == 0 {
			r.line++
			continue
		}

		// If the line starts with PIR junk, ignore the line.
		if bytes.HasPrefix(line, []byte("C;")) ||
			bytes.HasPrefix(line, []byte("structure")) ||
			bytes.HasPrefix(line, []byte("sequence")) {

			r.line++
			continue
		}

		// If we haven't seen the header yet, this better be it.
		if !seenHeader {
			if line[0] != '>' {
				return seq.Sequence{},
					fmt.Errorf("Expected '>', got '%c' on line %d.",
						line[0], r.line)
			}

			// Trim the '>' and load this line into the header.
			s.Name = trimHeader(line)
			seenHeader = true

			r.line++
			continue
		} else if line[0] == '>' {
			// This means we've begun reading the next entry.
			// So slap this line into 'nextHeader' and return the current entry.
			r.nextHeader = line

			r.line++
			return s, nil
		}

		// Finally, time to start reading the sequence.
		// If we trust the sequences, then we can just append this line
		// willy nilly. Otherwise we've got to check each character.
		if s.Residues == nil {
			s.Residues = make([]seq.Residue, 0, 50)
		}
		if r.TrustSequences {
			for _, b := range line {
				s.Residues = append(s.Residues, seq.Residue(b))
			}
		} else {
			for _, b := range line {
				bNew, ok := translate(b)
				if !ok {
					return seq.Sequence{},
						fmt.Errorf("Invalid character '%c' on line %d.",
							b, r.line)
				}

				// If the zero byte is returned from translate, then we
				// don't keep this residue around.
				if bNew > 0 {
					s.Residues = append(s.Residues, bNew)
				}
			}
		}
		r.line++
	}
	panic("unreachable")
}