func (r *Reader) commentMetaline(line []byte) (f feat.Feature, err error) { fields := bytes.Split(line, []byte{' '}) if len(fields) < 1 { return nil, &csv.ParseError{Line: r.line, Err: ErrEmptyMetaLine} } switch unsafeString(fields[0]) { case "gff-version": v := mustAtoi(fields, 1, r.line) if v > Version { return nil, &csv.ParseError{Line: r.line, Err: ErrNotHandled} } r.Version = Version return r.Read() case "source-version": if len(fields) <= 1 { return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine} } r.SourceVersion = string(bytes.Join(fields[1:], []byte{' '})) return r.Read() case "date": if len(fields) <= 1 { return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine} } if len(r.TimeFormat) > 0 { r.Date, err = time.Parse(r.TimeFormat, unsafeString(bytes.Join(fields[1:], []byte{' '}))) if err != nil { return nil, err } } return r.Read() case "Type", "type": if len(fields) <= 1 { return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine} } r.Type = feat.ParseMoltype(unsafeString(fields[1])) if len(fields) > 2 { r.Name = string(fields[2]) } return r.Read() case "sequence-region": if len(fields) <= 3 { return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine} } return &Region{ Sequence: Sequence{SeqName: string(fields[1]), Type: r.Type}, RegionStart: feat.OneToZero(mustAtoi(fields, 2, r.line)), RegionEnd: mustAtoi(fields, 3, r.line), }, nil case "DNA", "RNA", "Protein", "dna", "rna", "protein": if len(fields) <= 1 { return nil, &csv.ParseError{Line: r.line, Err: ErrBadMetaLine} } return r.metaSeq(fields[0], fields[1]) default: return nil, &csv.ParseError{Line: r.line, Err: ErrNotHandled} } }
// Read reads a single feature or part and return it or an error. A call to read may // have side effects on the Reader's Metadata field. func (r *Reader) Read() (f feat.Feature, err error) { defer handlePanic(f, &err) var line []byte for { line, err = r.r.ReadBytes('\n') if err != nil { if err == io.EOF { return f, err } return nil, &csv.ParseError{Line: r.line, Err: err} } r.line++ line = bytes.TrimSpace(line) if len(line) == 0 { // ignore blank lines continue } else if bytes.HasPrefix(line, []byte("##")) { f, err = r.commentMetaline(line[2:]) return } else if line[0] != '#' { // ignore comments break } } fields := bytes.SplitN(line, []byte{'\t'}, lastField) if len(fields) < frameField { return nil, &csv.ParseError{Line: r.line, Column: len(fields), Err: ErrFieldMissing} } gff := &Feature{ SeqName: string(fields[nameField]), Source: string(fields[sourceField]), Feature: string(fields[featureField]), FeatStart: feat.OneToZero(mustAtoi(fields, startField, r.line)), FeatEnd: mustAtoi(fields, endField, r.line), FeatScore: mustAtofPtr(fields, scoreField, r.line), FeatStrand: mustAtos(fields, strandField, r.line), FeatFrame: mustAtoFr(fields, frameField, r.line), } if len(fields) <= attributeField { return gff, nil } gff.FeatAttributes = mustAtoa(fields, attributeField, r.line) if len(fields) <= commentField { return gff, nil } gff.Comments = string(fields[commentField]) if gff.FeatStart >= gff.FeatEnd { err = ErrBadFeature } return gff, nil }
func main() { if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, "invalid invocation: must have at least one reads file") os.Exit(1) } extract := make(map[string][2]int) sc := featio.NewScanner(gff.NewReader(os.Stdin)) for sc.Next() { f := sc.Feat().(*gff.Feature) read := f.FeatAttributes.Get("Read") if read == "" { continue } fields := strings.Fields(read) name := fields[0] start, err := strconv.Atoi(fields[1]) if err != nil { log.Fatalf("failed to parse %q: %v", read, err) } end, err := strconv.Atoi(fields[2]) if err != nil { log.Fatalf("failed to parse %q: %v", read, err) } extract[name] = [2]int{start, end} } err := sc.Error() if err != nil { log.Fatalf("error during GFF read: %v", err) } for _, reads := range os.Args[1:] { sf, err := os.Open(reads) if err != nil { log.Fatalf("failed to open %q: %v", reads, err) } sr, err := sam.NewReader(sf) if err != nil { log.Fatalf("failed to open SAM input %q: %v", reads, err) } for { r, err := sr.Read() if err != nil { if err != io.EOF { log.Fatalf("unexpected error reading SAM: %v", err) } break } v, ok := extract[r.Name] if !ok { continue } // Currently reefer only expects a single hit per read, // so any multiples are due to duplicate read file input. // Update this behaviour if we change reefer to look at // remapping soft-clipped segments. delete(extract, r.Name) reverse := r.Flags&sam.Reverse != 0 rng := fmt.Sprintf("//%d_%d", v[0], v[1]) if reverse { rng += "(-)" len := r.Seq.Length v[0], v[1] = len-v[1], len-v[0] } v[0] = feat.OneToZero(v[0]) s := linear.NewSeq( r.Name+rng, alphabet.BytesToLetters(r.Seq.Expand())[v[0]:v[1]], alphabet.DNA, ) if reverse { s.Desc = "(sequence revcomp relative to read)" } fmt.Printf("%60a\n", s) } sf.Close() } }