// ReadBamFile reads bam file, and return the header and a channel of sam records. func readBamFile(fileName string) (h *sam.Header, c chan sam.Record) { // Initialize the channel of sam records. c = make(chan sam.Record) // Create a new go routine to read the records. go func() { // Close the record channel when finished. defer close(c) // Open file stream, and close it when finished. f, err := os.Open(fileName) if err != nil { log.Fatalln(err) } defer f.Close() type SamReader interface { Header() *sam.Header Read() (*sam.Record, error) } var reader SamReader if fileName[len(fileName)-3:] == "bam" { bamReader, err := bam.NewReader(f, 0) if err != nil { log.Fatalln(err) } defer bamReader.Close() reader = bamReader } else { reader, err = sam.NewReader(f) if err != nil { log.Fatalln(err) } } // Read and assign header. h = reader.Header() // Read sam records and send them to the channel, // until it hit an error, which raises a panic // if it is not a IO EOF. for { rec, err := reader.Read() if err != nil { if err != io.EOF { log.Fatalln(err) } break } c <- *rec } }() return }
func main() { if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, "invalid invocation: must have at least one reads file") os.Exit(1) } extract := make(map[string][2]int) sc := featio.NewScanner(gff.NewReader(os.Stdin)) for sc.Next() { f := sc.Feat().(*gff.Feature) read := f.FeatAttributes.Get("Read") if read == "" { continue } fields := strings.Fields(read) name := fields[0] start, err := strconv.Atoi(fields[1]) if err != nil { log.Fatalf("failed to parse %q: %v", read, err) } end, err := strconv.Atoi(fields[2]) if err != nil { log.Fatalf("failed to parse %q: %v", read, err) } extract[name] = [2]int{start, end} } err := sc.Error() if err != nil { log.Fatalf("error during GFF read: %v", err) } for _, reads := range os.Args[1:] { sf, err := os.Open(reads) if err != nil { log.Fatalf("failed to open %q: %v", reads, err) } sr, err := sam.NewReader(sf) if err != nil { log.Fatalf("failed to open SAM input %q: %v", reads, err) } for { r, err := sr.Read() if err != nil { if err != io.EOF { log.Fatalf("unexpected error reading SAM: %v", err) } break } v, ok := extract[r.Name] if !ok { continue } // Currently reefer only expects a single hit per read, // so any multiples are due to duplicate read file input. // Update this behaviour if we change reefer to look at // remapping soft-clipped segments. delete(extract, r.Name) reverse := r.Flags&sam.Reverse != 0 rng := fmt.Sprintf("//%d_%d", v[0], v[1]) if reverse { rng += "(-)" len := r.Seq.Length v[0], v[1] = len-v[1], len-v[0] } v[0] = feat.OneToZero(v[0]) s := linear.NewSeq( r.Name+rng, alphabet.BytesToLetters(r.Seq.Expand())[v[0]:v[1]], alphabet.DNA, ) if reverse { s.Desc = "(sequence revcomp relative to read)" } fmt.Printf("%60a\n", s) } sf.Close() } }
// deletions analyses *sam.Records from mapping reads to the given reference // using the suffix array file if provided. If run is false, blasr is not // run and the existing blasr output is used to provide the *sam.Records. // procs specifies the number of blasr threads to use. func deletions(reads, ref, suff, ext string, procs int, run bool, window, min int, br *refiner, w *gff.Writer) error { base := filepath.Base(reads) b := blasr.BLASR{ Cmd: *blasrPath, Reads: reads, Genome: ref, SuffixArray: suff, BestN: 1, SAM: true, Clipping: "soft", SAMQV: true, CIGARSeqMatch: true, Aligned: base + ".blasr." + ext, Unaligned: base + ".blasr.unmapped.fasta", Procs: procs, } if run { cmd, err := b.BuildCommand() if err != nil { return err } cmd.Stdout = errStream cmd.Stderr = errStream err = cmd.Run() if err != nil { return err } } f, err := os.Open(b.Aligned) if err != nil { return err } defer f.Close() cost := [...]float64{ sam.CigarInsertion: -2, sam.CigarDeletion: -2, sam.CigarEqual: 1, sam.CigarMismatch: -1, // Included for explicitness sam.CigarSoftClipped: 0, // Included to ensure no bounds panic. // All CIGAR operations not listed above // are given a zero cost. sam.CigarBack: 0, } _, err = w.WriteComment(fmt.Sprintf("smoothing window=%d", window)) if err != nil { return nil } _, err = w.WriteComment(fmt.Sprintf("minimum feature length=%d", min)) if err != nil { return nil } gf := &gff.Feature{ Source: "reefer", Feature: "discordance", FeatFrame: gff.NoFrame, FeatAttributes: gff.Attributes{{Tag: "Read"}, {Tag: "Dup"}}, } var sr interface { Read() (*sam.Record, error) } switch ext { case "sam": sr, err = sam.NewReader(f) if err != nil { return err } case "bam": var br *bam.Reader br, err = bam.NewReader(f, 0) if err != nil { return err } defer br.Close() sr = br default: panic("reefer: invalid extension") } for { r, err := sr.Read() if err != nil { if err != io.EOF { return err } break } var ( scores []costPos ref = r.Start() query int ) for _, co := range r.Cigar { for i := 0; i < co.Len(); i++ { scores = append(scores, costPos{ ref: ref, query: query, cost: cost[co.Type()], }) consume := co.Type().Consumes() ref += consume.Reference query += consume.Query } } if len(scores) <= window { continue } smoothed := make([]costPos, len(scores)-window) for i := range scores[:len(scores)-window] { smoothed[i] = mean(scores[i : i+window]) } var d deletion for i, v := range smoothed[1:] { switch { case d.record == nil && v.cost < 0 && smoothed[i].cost >= 0: d = deletion{record: r, rstart: v.ref + 1, qstart: v.query + 1} case d.record != nil && v.cost >= 0 && smoothed[i].cost < 0: d.rend = v.ref d.qend = v.query if d.rend-d.rstart >= min || d.qend-d.qstart >= min { gf.SeqName = d.record.Ref.Name() gf.FeatStrand = strandFor(d.record) if gf.FeatStrand == seq.Minus { len := d.record.Seq.Length d.qstart, d.qend = len-d.qend, len-d.qstart } // Adjust ends based on paired SW alignments. var refined bool d, refined, err = br.adjust(d) if err != nil && *verbose { log.Printf("failed alignment %s: %v", d.record.Name, err) } gf.FeatStart = d.rstart gf.FeatEnd = d.rend if gf.FeatStart == gf.FeatEnd { // This is disgusting garbage resulting from // GFF not allowing zero length features. gf.FeatEnd++ } if refined { gf.FeatAttributes = gf.FeatAttributes[:2] gf.FeatAttributes[1].Value = strconv.Itoa(d.dup) } else { gf.FeatAttributes = gf.FeatAttributes[:1] } gf.FeatAttributes[0].Value = fmt.Sprintf("%s %d %d", d.record.Name, feat.ZeroToOne(d.qstart), d.qend) _, err = w.Write(gf) if err != nil { return err } } d.record = nil } } } return nil }