// writeResults writes out the results of the analysis in a format similar to the // Pacific Biosciences bridgemapper program (29 tab separated fields). It also writes // candidate discordances to the discords gff.Writer if it is not nil. Flanks less than // flank long are not considered and primay mappings less than length long are omitted. func writeResults(core, left, right hitSet, out io.Writer, length, flank int, discords *gff.Writer) error { for id, c := range core { if c.qEnd-c.qStart < length { continue } l, ok := left[id] if ok && abs(l.tEnd-l.tStart) < flank { l = nil } r, ok := right[id] if ok && abs(r.tEnd-r.tStart) < flank { r = nil } if l == nil && r == nil { continue } _, err := fmt.Fprintf(out, "%s\t%d\t%v\t%v\t%v\n", id, c.qLen, l, c, r) if err != nil { return err } if discords != nil { for _, f := range [2]*blasrHit{l, r} { if f == nil { continue } if f.tName != c.tName { _, err = discords.Write(&gff.Feature{ SeqName: f.tName, Feature: "flank", Source: "loopy", FeatStart: f.tStart, FeatEnd: f.tEnd, FeatScore: floatPtr(float64(f.score)), FeatStrand: f.qStrand, FeatFrame: gff.NoFrame, }) if err != nil { return err } } else if f.tStrand == c.tStrand { for _, g := range gapOrOverlap(f, c, flank) { _, err = discords.Write(g) if err != nil { return err } } } } } } return nil }
func main() { flag.Parse() if *exclude == "" { flag.Usage() os.Exit(1) } nameSet := make(map[string]struct{}) f, err := os.Open(*exclude) if err != nil { log.Fatalf("failed to open exclude file %q: %v", *exclude, err) } ls := bufio.NewScanner(f) for ls.Scan() { nameSet[ls.Text()] = struct{}{} } err = ls.Err() if err != nil { log.Fatalf("failed to read exclude file: %v", err) } w := gff.NewWriter(os.Stdout, 60, true) var excl *gff.Writer if *retain { excl = gff.NewWriter(os.Stderr, 60, true) } sc := featio.NewScanner(gff.NewReader(os.Stdin)) for sc.Next() { f := sc.Feat().(*gff.Feature) n := f.FeatAttributes.Get("Read") if _, ok := nameSet[n]; ok { if excl != nil { _, err := excl.Write(f) if err != nil { log.Fatalf("failed to write feature: %v", err) } } continue } _, err := w.Write(f) if err != nil { log.Fatalf("failed to write feature: %v", err) } } if err := sc.Error(); err != nil { log.Fatalf("error during gff read: %v", err) } }
// deletions analyses *sam.Records from mapping reads to the given reference // using the suffix array file if provided. If run is false, blasr is not // run and the existing blasr output is used to provide the *sam.Records. // procs specifies the number of blasr threads to use. func deletions(reads, ref, suff, ext string, procs int, run bool, window, min int, br *refiner, w *gff.Writer) error { base := filepath.Base(reads) b := blasr.BLASR{ Cmd: *blasrPath, Reads: reads, Genome: ref, SuffixArray: suff, BestN: 1, SAM: true, Clipping: "soft", SAMQV: true, CIGARSeqMatch: true, Aligned: base + ".blasr." + ext, Unaligned: base + ".blasr.unmapped.fasta", Procs: procs, } if run { cmd, err := b.BuildCommand() if err != nil { return err } cmd.Stdout = errStream cmd.Stderr = errStream err = cmd.Run() if err != nil { return err } } f, err := os.Open(b.Aligned) if err != nil { return err } defer f.Close() cost := [...]float64{ sam.CigarInsertion: -2, sam.CigarDeletion: -2, sam.CigarEqual: 1, sam.CigarMismatch: -1, // Included for explicitness sam.CigarSoftClipped: 0, // Included to ensure no bounds panic. // All CIGAR operations not listed above // are given a zero cost. sam.CigarBack: 0, } _, err = w.WriteComment(fmt.Sprintf("smoothing window=%d", window)) if err != nil { return nil } _, err = w.WriteComment(fmt.Sprintf("minimum feature length=%d", min)) if err != nil { return nil } gf := &gff.Feature{ Source: "reefer", Feature: "discordance", FeatFrame: gff.NoFrame, FeatAttributes: gff.Attributes{{Tag: "Read"}, {Tag: "Dup"}}, } var sr interface { Read() (*sam.Record, error) } switch ext { case "sam": sr, err = sam.NewReader(f) if err != nil { return err } case "bam": var br *bam.Reader br, err = bam.NewReader(f, 0) if err != nil { return err } defer br.Close() sr = br default: panic("reefer: invalid extension") } for { r, err := sr.Read() if err != nil { if err != io.EOF { return err } break } var ( scores []costPos ref = r.Start() query int ) for _, co := range r.Cigar { for i := 0; i < co.Len(); i++ { scores = append(scores, costPos{ ref: ref, query: query, cost: cost[co.Type()], }) consume := co.Type().Consumes() ref += consume.Reference query += consume.Query } } if len(scores) <= window { continue } smoothed := make([]costPos, len(scores)-window) for i := range scores[:len(scores)-window] { smoothed[i] = mean(scores[i : i+window]) } var d deletion for i, v := range smoothed[1:] { switch { case d.record == nil && v.cost < 0 && smoothed[i].cost >= 0: d = deletion{record: r, rstart: v.ref + 1, qstart: v.query + 1} case d.record != nil && v.cost >= 0 && smoothed[i].cost < 0: d.rend = v.ref d.qend = v.query if d.rend-d.rstart >= min || d.qend-d.qstart >= min { gf.SeqName = d.record.Ref.Name() gf.FeatStrand = strandFor(d.record) if gf.FeatStrand == seq.Minus { len := d.record.Seq.Length d.qstart, d.qend = len-d.qend, len-d.qstart } // Adjust ends based on paired SW alignments. var refined bool d, refined, err = br.adjust(d) if err != nil && *verbose { log.Printf("failed alignment %s: %v", d.record.Name, err) } gf.FeatStart = d.rstart gf.FeatEnd = d.rend if gf.FeatStart == gf.FeatEnd { // This is disgusting garbage resulting from // GFF not allowing zero length features. gf.FeatEnd++ } if refined { gf.FeatAttributes = gf.FeatAttributes[:2] gf.FeatAttributes[1].Value = strconv.Itoa(d.dup) } else { gf.FeatAttributes = gf.FeatAttributes[:1] } gf.FeatAttributes[0].Value = fmt.Sprintf("%s %d %d", d.record.Name, feat.ZeroToOne(d.qstart), d.qend) _, err = w.Write(gf) if err != nil { return err } } d.record = nil } } } return nil }