func ExampleIndex_Add() { // Create a BAI for the BAM read from standard in and write it to standard out. br, err := bam.NewReader(os.Stdin, 1) if err != nil { log.Fatalf("failed to open BAM: %v", err) } var bai bam.Index for { r, err := br.Read() if err == io.EOF { break } if err != nil { log.Fatalf("failed to read BAM record: %v", err) } err = bai.Add(r, br.LastChunk()) if err != nil { log.Fatalf("failed to add record to BAM index: %v", err) } } err = bam.WriteIndex(os.Stdout, &bai) if err != nil { log.Fatalf("failed to write BAM index: %v", err) } }
func BamToRelatable(f io.Reader) (interfaces.RelatableChannel, error) { ch := make(chan interfaces.Relatable, 64) b, err := bam.NewReader(f, 0) if err != nil { return nil, err } go func() { for { rec, err := b.Read() if err != nil { if err == io.EOF { break } else { log.Println(err) break } } if rec.RefID() == -1 { // unmapped continue } // TODO: see if keeping the list of chrom names and using a ref is better. bam := Bam{Record: rec, Chromosome: rec.Ref.Name(), related: nil} ch <- &bam } close(ch) b.Close() f.(io.ReadCloser).Close() }() return ch, nil }
func (c *cmdFilter) readBamHeader() *sam.Header { f := openFile(c.bamFile) defer f.Close() rd, err := bam.NewReader(f, 0) raiseError(err) defer rd.Close() header := rd.Header() return header }
// ReadBamFile reads bam file, and return the header and a channel of sam records. func readBamFile(fileName string) (h *sam.Header, c chan sam.Record) { // Initialize the channel of sam records. c = make(chan sam.Record) // Create a new go routine to read the records. go func() { // Close the record channel when finished. defer close(c) // Open file stream, and close it when finished. f, err := os.Open(fileName) if err != nil { log.Fatalln(err) } defer f.Close() type SamReader interface { Header() *sam.Header Read() (*sam.Record, error) } var reader SamReader if fileName[len(fileName)-3:] == "bam" { bamReader, err := bam.NewReader(f, 0) if err != nil { log.Fatalln(err) } defer bamReader.Close() reader = bamReader } else { reader, err = sam.NewReader(f) if err != nil { log.Fatalln(err) } } // Read and assign header. h = reader.Header() // Read sam records and send them to the channel, // until it hit an error, which raises a panic // if it is not a IO EOF. for { rec, err := reader.Read() if err != nil { if err != io.EOF { log.Fatalln(err) } break } c <- *rec } }() return }
func (c *cmdFilter) readBamFile() (ch chan *sam.Record) { ch = make(chan *sam.Record) go func() { defer close(ch) f := openFile(c.bamFile) defer f.Close() rd, err := bam.NewReader(f, 0) raiseError(err) defer rd.Close() for { record, err := rd.Read() if err != nil { if err != io.EOF { raiseError(err) } break } ch <- record } }() return }
// deletions analyses *sam.Records from mapping reads to the given reference // using the suffix array file if provided. If run is false, blasr is not // run and the existing blasr output is used to provide the *sam.Records. // procs specifies the number of blasr threads to use. func deletions(reads, ref, suff, ext string, procs int, run bool, window, min int, br *refiner, w *gff.Writer) error { base := filepath.Base(reads) b := blasr.BLASR{ Cmd: *blasrPath, Reads: reads, Genome: ref, SuffixArray: suff, BestN: 1, SAM: true, Clipping: "soft", SAMQV: true, CIGARSeqMatch: true, Aligned: base + ".blasr." + ext, Unaligned: base + ".blasr.unmapped.fasta", Procs: procs, } if run { cmd, err := b.BuildCommand() if err != nil { return err } cmd.Stdout = errStream cmd.Stderr = errStream err = cmd.Run() if err != nil { return err } } f, err := os.Open(b.Aligned) if err != nil { return err } defer f.Close() cost := [...]float64{ sam.CigarInsertion: -2, sam.CigarDeletion: -2, sam.CigarEqual: 1, sam.CigarMismatch: -1, // Included for explicitness sam.CigarSoftClipped: 0, // Included to ensure no bounds panic. // All CIGAR operations not listed above // are given a zero cost. sam.CigarBack: 0, } _, err = w.WriteComment(fmt.Sprintf("smoothing window=%d", window)) if err != nil { return nil } _, err = w.WriteComment(fmt.Sprintf("minimum feature length=%d", min)) if err != nil { return nil } gf := &gff.Feature{ Source: "reefer", Feature: "discordance", FeatFrame: gff.NoFrame, FeatAttributes: gff.Attributes{{Tag: "Read"}, {Tag: "Dup"}}, } var sr interface { Read() (*sam.Record, error) } switch ext { case "sam": sr, err = sam.NewReader(f) if err != nil { return err } case "bam": var br *bam.Reader br, err = bam.NewReader(f, 0) if err != nil { return err } defer br.Close() sr = br default: panic("reefer: invalid extension") } for { r, err := sr.Read() if err != nil { if err != io.EOF { return err } break } var ( scores []costPos ref = r.Start() query int ) for _, co := range r.Cigar { for i := 0; i < co.Len(); i++ { scores = append(scores, costPos{ ref: ref, query: query, cost: cost[co.Type()], }) consume := co.Type().Consumes() ref += consume.Reference query += consume.Query } } if len(scores) <= window { continue } smoothed := make([]costPos, len(scores)-window) for i := range scores[:len(scores)-window] { smoothed[i] = mean(scores[i : i+window]) } var d deletion for i, v := range smoothed[1:] { switch { case d.record == nil && v.cost < 0 && smoothed[i].cost >= 0: d = deletion{record: r, rstart: v.ref + 1, qstart: v.query + 1} case d.record != nil && v.cost >= 0 && smoothed[i].cost < 0: d.rend = v.ref d.qend = v.query if d.rend-d.rstart >= min || d.qend-d.qstart >= min { gf.SeqName = d.record.Ref.Name() gf.FeatStrand = strandFor(d.record) if gf.FeatStrand == seq.Minus { len := d.record.Seq.Length d.qstart, d.qend = len-d.qend, len-d.qstart } // Adjust ends based on paired SW alignments. var refined bool d, refined, err = br.adjust(d) if err != nil && *verbose { log.Printf("failed alignment %s: %v", d.record.Name, err) } gf.FeatStart = d.rstart gf.FeatEnd = d.rend if gf.FeatStart == gf.FeatEnd { // This is disgusting garbage resulting from // GFF not allowing zero length features. gf.FeatEnd++ } if refined { gf.FeatAttributes = gf.FeatAttributes[:2] gf.FeatAttributes[1].Value = strconv.Itoa(d.dup) } else { gf.FeatAttributes = gf.FeatAttributes[:1] } gf.FeatAttributes[0].Value = fmt.Sprintf("%s %d %d", d.record.Name, feat.ZeroToOne(d.qstart), d.qend) _, err = w.Write(gf) if err != nil { return err } } d.record = nil } } } return nil }