Exemple #1
0
func (r *Reader) commentMetaline(line []byte) (f feat.Feature, err error) {
	fields := bytes.Split(line, []byte{' '})
	if len(fields) < 1 {
		return nil, &csv.ParseError{Line: r.line, Err: ErrEmptyMetaLine}
	}
	switch unsafeString(fields[0]) {
	case "gff-version":
		v := mustAtoi(fields, 1, r.line)
		if v > Version {
			return nil, &csv.ParseError{Line: r.line, Err: ErrNotHandled}
		}
		r.Version = Version
		return r.Read()
	case "source-version":
		if len(fields) <= 1 {
			return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine}
		}
		r.SourceVersion = string(bytes.Join(fields[1:], []byte{' '}))
		return r.Read()
	case "date":
		if len(fields) <= 1 {
			return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine}
		}
		if len(r.TimeFormat) > 0 {
			r.Date, err = time.Parse(r.TimeFormat, unsafeString(bytes.Join(fields[1:], []byte{' '})))
			if err != nil {
				return nil, err
			}
		}
		return r.Read()
	case "Type", "type":
		if len(fields) <= 1 {
			return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine}
		}
		r.Type = feat.ParseMoltype(unsafeString(fields[1]))
		if len(fields) > 2 {
			r.Name = string(fields[2])
		}
		return r.Read()
	case "sequence-region":
		if len(fields) <= 3 {
			return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine}
		}
		return &Region{
			Sequence:    Sequence{SeqName: string(fields[1]), Type: r.Type},
			RegionStart: feat.OneToZero(mustAtoi(fields, 2, r.line)),
			RegionEnd:   mustAtoi(fields, 3, r.line),
		}, nil
	case "DNA", "RNA", "Protein", "dna", "rna", "protein":
		if len(fields) <= 1 {
			return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine}
		}
		return r.metaSeq(fields[0], fields[1])
	default:
		return nil, &csv.ParseError{Line: r.line, Err: ErrNotHandled}
	}
}
Exemple #2
0
// Read reads a single feature or part and return it or an error. A call to read may
// have side effects on the Reader's Metadata field.
func (r *Reader) Read() (f feat.Feature, err error) {
	defer handlePanic(f, &err)

	var line []byte
	for {
		line, err = r.r.ReadBytes('\n')
		if err != nil {
			if err == io.EOF {
				return f, err
			}
			return nil, &csv.ParseError{Line: r.line, Err: err}
		}
		r.line++
		line = bytes.TrimSpace(line)
		if len(line) == 0 { // ignore blank lines
			continue
		} else if bytes.HasPrefix(line, []byte("##")) {
			f, err = r.commentMetaline(line[2:])
			return
		} else if line[0] != '#' { // ignore comments
			break
		}
	}

	fields := bytes.SplitN(line, []byte{'\t'}, lastField)
	if len(fields) < frameField {
		return nil, &csv.ParseError{Line: r.line, Column: len(fields), Err: ErrFieldMissing}
	}

	gff := &Feature{
		SeqName:    string(fields[nameField]),
		Source:     string(fields[sourceField]),
		Feature:    string(fields[featureField]),
		FeatStart:  feat.OneToZero(mustAtoi(fields, startField, r.line)),
		FeatEnd:    mustAtoi(fields, endField, r.line),
		FeatScore:  mustAtofPtr(fields, scoreField, r.line),
		FeatStrand: mustAtos(fields, strandField, r.line),
		FeatFrame:  mustAtoFr(fields, frameField, r.line),
	}

	if len(fields) <= attributeField {
		return gff, nil
	}
	gff.FeatAttributes = mustAtoa(fields, attributeField, r.line)
	if len(fields) <= commentField {
		return gff, nil
	}
	gff.Comments = string(fields[commentField])

	if gff.FeatStart >= gff.FeatEnd {
		err = ErrBadFeature
	}
	return gff, nil
}
Exemple #3
0
func main() {
	if len(os.Args) < 2 {
		fmt.Fprintln(os.Stderr, "invalid invocation: must have at least one reads file")
		os.Exit(1)
	}

	extract := make(map[string][2]int)
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		read := f.FeatAttributes.Get("Read")
		if read == "" {
			continue
		}
		fields := strings.Fields(read)
		name := fields[0]
		start, err := strconv.Atoi(fields[1])
		if err != nil {
			log.Fatalf("failed to parse %q: %v", read, err)
		}
		end, err := strconv.Atoi(fields[2])
		if err != nil {
			log.Fatalf("failed to parse %q: %v", read, err)
		}
		extract[name] = [2]int{start, end}
	}
	err := sc.Error()
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}

	for _, reads := range os.Args[1:] {
		sf, err := os.Open(reads)
		if err != nil {
			log.Fatalf("failed to open %q: %v", reads, err)
		}
		sr, err := sam.NewReader(sf)
		if err != nil {
			log.Fatalf("failed to open SAM input %q: %v", reads, err)
		}
		for {
			r, err := sr.Read()
			if err != nil {
				if err != io.EOF {
					log.Fatalf("unexpected error reading SAM: %v", err)
				}
				break
			}

			v, ok := extract[r.Name]
			if !ok {
				continue
			}
			// Currently reefer only expects a single hit per read,
			// so any multiples are due to duplicate read file input.
			// Update this behaviour if we change reefer to look at
			// remapping soft-clipped segments.
			delete(extract, r.Name)

			reverse := r.Flags&sam.Reverse != 0
			rng := fmt.Sprintf("//%d_%d", v[0], v[1])
			if reverse {
				rng += "(-)"
				len := r.Seq.Length
				v[0], v[1] = len-v[1], len-v[0]
			}
			v[0] = feat.OneToZero(v[0])
			s := linear.NewSeq(
				r.Name+rng,
				alphabet.BytesToLetters(r.Seq.Expand())[v[0]:v[1]],
				alphabet.DNA,
			)
			if reverse {
				s.Desc = "(sequence revcomp relative to read)"
			}
			fmt.Printf("%60a\n", s)
		}
		sf.Close()
	}
}