Пример #1
0
Файл: gff.go Проект: frogs/biogo
func (self *Reader) metaSequence(moltype, id string) (sequence *seq.Seq, err error) {
	var line, body []byte

	for {
		line, err = self.r.ReadBytes('\n')
		if err != nil {
			return nil, err
		}
		if len(line) > 0 && line[len(line)-1] == '\r' {
			line = line[:len(line)-1]
		}
		if len(line) == 0 {
			continue
		}
		if len(line) < 2 || !bytes.HasPrefix(line, []byte("##")) {
			return nil, bio.NewError("Corrupt metasequence", 0, line)
		}
		line = bytes.TrimSpace(line[2:])
		if string(line) == "end-"+moltype {
			break
		} else {
			line = bytes.Join(bytes.Fields(line), nil)
			body = append(body, line...)
		}
	}

	sequence = seq.New(id, body, nil)
	sequence.Moltype = bio.ParseMoltype(moltype)

	return
}
Пример #2
0
// Read a single sequence and return it or an error.
// TODO: Does not read interleaved fastq.
func (self *Reader) Read() (sequence *seq.Seq, err error) {
	var line, label, seqBody, qualBody []byte
	sequence = &seq.Seq{}

	inQual := false
READ:
	for {
		line, err = self.r.ReadBytes('\n')
		if err == nil {
			if len(line) > 0 && line[len(line)-1] == '\r' {
				line = line[:len(line)-1]
			}
			line = bytes.TrimSpace(line)
			if len(line) == 0 {
				continue
			}
			switch {
			case !inQual && line[0] == '@':
				label = line[1:]
			case !inQual && line[0] == '+':
				if len(label) == 0 {
					return nil, bio.NewError("No ID line parsed at +line in fastq format", 0)
				}
				if len(line) > 1 && bytes.Compare(label, line[1:]) != 0 {
					return nil, bio.NewError("Quality ID does not match sequence ID", 0)
				}
				inQual = true
			case !inQual:
				line = bytes.Join(bytes.Fields(line), nil)
				seqBody = append(seqBody, line...)
			case inQual:
				line = bytes.Join(bytes.Fields(line), nil)
				qualBody = append(qualBody, line...)
				if len(qualBody) >= len(seqBody) {
					break READ
				}
			}
		} else {
			return
		}
	}

	if len(seqBody) != len(qualBody) {
		return nil, bio.NewError("Quality length does not match sequence length", 0)
	}

	labelString := string(label)
	sequence = seq.New(labelString, seqBody, seq.NewQuality(labelString, self.decodeQuality(qualBody)))

	return
}
Пример #3
0
// Read a single sequence and return it or an error.
func (self *Reader) Read() (sequence *seq.Seq, err error) {
	var line, label, body []byte
	label = self.last

READ:
	for {
		line, err = self.r.ReadBytes('\n')
		if err == nil {
			self.line++
			if len(line) > 0 && line[len(line)-1] == '\r' {
				line = line[:len(line)-1]
			}
			line = bytes.TrimSpace(line)
			if len(line) == 0 {
				continue
			}
			switch {
			case bytes.HasPrefix(line, self.IDPrefix):
				if self.last == nil {
					self.last = line[len(self.IDPrefix):]
				} else {
					label = self.last
					self.last = line[len(self.IDPrefix):] // entering a new sequence so exit read loop
					break READ
				}
			case bytes.HasPrefix(line, self.SeqPrefix):
				line = bytes.Join(bytes.Fields(line[len(self.SeqPrefix):]), nil)
				body = append(body, line...)
			}
		} else {
			if self.last != nil {
				label = self.last
				self.last = nil
				err = nil
				break
			} else {
				return nil, io.EOF
			}
		}
	}

	if len(label) > 0 || len(body) > 0 {
		sequence = seq.New(string(label), body, nil)
	} else {
		err = bio.NewError("fasta: empty sequence", 0, self.line)
	}

	return
}