Esempio n. 1
0
File: fastq.go Progetto: gingi/Shock
// Read a single sequence and return it or an error.
// TODO: Does not read interleaved fastq.
func (self *Reader) Read() (sequence *seq.Seq, err error) {
	var line, label, seqBody, qualBody []byte
	sequence = &seq.Seq{}

	inQual := false
READ:
	for {
		if line, err = self.r.ReadBytes('\n'); err == nil {
			if len(line) > 0 && line[len(line)-1] == '\r' {
				line = line[:len(line)-1]
			}
			line = bytes.TrimSpace(line)
			if len(line) == 0 {
				continue
			}
			switch {
			case !inQual && line[0] == '@':
				label = line[1:]
			case !inQual && line[0] == '+':
				if len(label) == 0 {
					return nil, errors.New("No ID line parsed at +line in fastq format")
				}
				if len(line) > 1 && bytes.Compare(label, line[1:]) != 0 {
					return nil, errors.New("Quality ID does not match sequence ID")
				}
				inQual = true
			case !inQual:
				line = bytes.Join(bytes.Fields(line), nil)
				seqBody = append(seqBody, line...)
			case inQual:
				line = bytes.Join(bytes.Fields(line), nil)
				qualBody = append(qualBody, line...)
				if len(qualBody) >= len(seqBody) {
					break READ
				}
			}
		} else {
			return
		}
	}

	if len(seqBody) != len(qualBody) {
		return nil, errors.New("Quality length does not match sequence length")
	}
	sequence = seq.New(label, seqBody, qualBody)

	return
}
Esempio n. 2
0
File: fasta.go Progetto: gingi/Shock
// Read a single sequence and return it or an error.
func (self *Reader) Read() (sequence *seq.Seq, err error) {
	var label, body []byte
	for {
		read, err := self.r.ReadBytes('>')
		if len(read) > 1 {
			lines := bytes.Split(read, []byte{'\n'})
			if len(lines) > 1 {
				label = lines[0]
				body = bytes.Join(lines[1:len(lines)-1], []byte{})
			}
			break
		} else if err != nil {
			return nil, io.EOF
		}
	}
	if len(label) > 0 && len(body) > 0 {
		sequence = seq.New(label, body, nil)
	} else {
		return nil, errors.New("Invalid fasta entry")
	}
	return
}
Esempio n. 3
0
// Read a single sequence and return it or an error.
func (self *Reader) Read() (sequence *seq.Seq, err error) {
	var line, label, seqBody []byte
	sequence = &seq.Seq{}

	for {
		if line, err = self.r.ReadBytes('\n'); err == nil {
			if len(line) > 0 && line[len(line)-1] == '\r' {
				line = line[:len(line)-1]
			}
			line = bytes.TrimSpace(line)
			if len(line) == 0 {
				continue
			}

			//skip header
			if line[0] == '@' {
				continue
			}

			seqBody = line
			fields := bytes.Split(line, []byte{'\t'})
			if len(fields) < 11 {
				return nil, errors.New("sam alignment fields less than 11")
			}

			label = fields[0]

			break
		} else {
			return
		}
	}

	sequence = seq.New(label, seqBody, nil)

	return
}