Exemple #1
0
func ExampleIndex_Add() {
	// Create a BAI for the BAM read from standard in and write it to standard out.
	br, err := bam.NewReader(os.Stdin, 1)
	if err != nil {
		log.Fatalf("failed to open BAM: %v", err)
	}

	var bai bam.Index
	for {
		r, err := br.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Fatalf("failed to read BAM record: %v", err)
		}
		err = bai.Add(r, br.LastChunk())
		if err != nil {
			log.Fatalf("failed to add record to BAM index: %v", err)
		}
	}

	err = bam.WriteIndex(os.Stdout, &bai)
	if err != nil {
		log.Fatalf("failed to write BAM index: %v", err)
	}
}
Exemple #2
0
func BamToRelatable(f io.Reader) (interfaces.RelatableChannel, error) {

	ch := make(chan interfaces.Relatable, 64)
	b, err := bam.NewReader(f, 0)
	if err != nil {
		return nil, err
	}

	go func() {
		for {
			rec, err := b.Read()
			if err != nil {
				if err == io.EOF {
					break
				} else {
					log.Println(err)
					break
				}
			}
			if rec.RefID() == -1 { // unmapped
				continue
			}
			// TODO: see if keeping the list of chrom names and using a ref is better.
			bam := Bam{Record: rec, Chromosome: rec.Ref.Name(), related: nil}
			ch <- &bam
		}
		close(ch)
		b.Close()
		f.(io.ReadCloser).Close()
	}()
	return ch, nil
}
Exemple #3
0
func (c *cmdFilter) readBamHeader() *sam.Header {
	f := openFile(c.bamFile)
	defer f.Close()
	rd, err := bam.NewReader(f, 0)
	raiseError(err)
	defer rd.Close()
	header := rd.Header()
	return header
}
Exemple #4
0
// ReadBamFile reads bam file, and return the header and a channel of sam records.
func readBamFile(fileName string) (h *sam.Header, c chan sam.Record) {
	// Initialize the channel of sam records.
	c = make(chan sam.Record)

	// Create a new go routine to read the records.
	go func() {
		// Close the record channel when finished.
		defer close(c)

		// Open file stream, and close it when finished.
		f, err := os.Open(fileName)
		if err != nil {
			log.Fatalln(err)
		}
		defer f.Close()

		type SamReader interface {
			Header() *sam.Header
			Read() (*sam.Record, error)
		}

		var reader SamReader
		if fileName[len(fileName)-3:] == "bam" {
			bamReader, err := bam.NewReader(f, 0)
			if err != nil {
				log.Fatalln(err)
			}
			defer bamReader.Close()
			reader = bamReader
		} else {
			reader, err = sam.NewReader(f)
			if err != nil {
				log.Fatalln(err)
			}
		}

		// Read and assign header.
		h = reader.Header()

		// Read sam records and send them to the channel,
		// until it hit an error, which raises a panic
		// if it is not a IO EOF.
		for {
			rec, err := reader.Read()
			if err != nil {
				if err != io.EOF {
					log.Fatalln(err)
				}
				break
			}
			c <- *rec
		}
	}()

	return
}
Exemple #5
0
func (c *cmdFilter) readBamFile() (ch chan *sam.Record) {
	ch = make(chan *sam.Record)

	go func() {
		defer close(ch)
		f := openFile(c.bamFile)
		defer f.Close()
		rd, err := bam.NewReader(f, 0)
		raiseError(err)
		defer rd.Close()
		for {
			record, err := rd.Read()
			if err != nil {
				if err != io.EOF {
					raiseError(err)
				}
				break
			}
			ch <- record
		}
	}()

	return
}
Exemple #6
0
// deletions analyses *sam.Records from mapping reads to the given reference
// using the suffix array file if provided. If run is false, blasr is not
// run and the existing blasr output is used to provide the *sam.Records.
// procs specifies the number of blasr threads to use.
func deletions(reads, ref, suff, ext string, procs int, run bool, window, min int, br *refiner, w *gff.Writer) error {
	base := filepath.Base(reads)
	b := blasr.BLASR{
		Cmd: *blasrPath,

		Reads: reads, Genome: ref, SuffixArray: suff,
		BestN: 1,

		SAM:           true,
		Clipping:      "soft",
		SAMQV:         true,
		CIGARSeqMatch: true,

		Aligned:   base + ".blasr." + ext,
		Unaligned: base + ".blasr.unmapped.fasta",

		Procs: procs,
	}
	if run {
		cmd, err := b.BuildCommand()
		if err != nil {
			return err
		}
		cmd.Stdout = errStream
		cmd.Stderr = errStream
		err = cmd.Run()
		if err != nil {
			return err
		}
	}

	f, err := os.Open(b.Aligned)
	if err != nil {
		return err
	}
	defer f.Close()

	cost := [...]float64{
		sam.CigarInsertion: -2,
		sam.CigarDeletion:  -2,
		sam.CigarEqual:     1,
		sam.CigarMismatch:  -1,

		// Included for explicitness
		sam.CigarSoftClipped: 0,

		// Included to ensure no bounds panic.
		// All CIGAR operations not listed above
		// are given a zero cost.
		sam.CigarBack: 0,
	}

	_, err = w.WriteComment(fmt.Sprintf("smoothing window=%d", window))
	if err != nil {
		return nil
	}
	_, err = w.WriteComment(fmt.Sprintf("minimum feature length=%d", min))
	if err != nil {
		return nil
	}
	gf := &gff.Feature{
		Source:         "reefer",
		Feature:        "discordance",
		FeatFrame:      gff.NoFrame,
		FeatAttributes: gff.Attributes{{Tag: "Read"}, {Tag: "Dup"}},
	}
	var sr interface {
		Read() (*sam.Record, error)
	}
	switch ext {
	case "sam":
		sr, err = sam.NewReader(f)
		if err != nil {
			return err
		}
	case "bam":
		var br *bam.Reader
		br, err = bam.NewReader(f, 0)
		if err != nil {
			return err
		}
		defer br.Close()
		sr = br
	default:
		panic("reefer: invalid extension")
	}
	for {
		r, err := sr.Read()
		if err != nil {
			if err != io.EOF {
				return err
			}
			break
		}

		var (
			scores []costPos
			ref    = r.Start()
			query  int
		)
		for _, co := range r.Cigar {
			for i := 0; i < co.Len(); i++ {
				scores = append(scores, costPos{
					ref:   ref,
					query: query,
					cost:  cost[co.Type()],
				})
				consume := co.Type().Consumes()
				ref += consume.Reference
				query += consume.Query
			}
		}
		if len(scores) <= window {
			continue
		}
		smoothed := make([]costPos, len(scores)-window)
		for i := range scores[:len(scores)-window] {
			smoothed[i] = mean(scores[i : i+window])
		}

		var d deletion
		for i, v := range smoothed[1:] {
			switch {
			case d.record == nil && v.cost < 0 && smoothed[i].cost >= 0:
				d = deletion{record: r, rstart: v.ref + 1, qstart: v.query + 1}
			case d.record != nil && v.cost >= 0 && smoothed[i].cost < 0:
				d.rend = v.ref
				d.qend = v.query
				if d.rend-d.rstart >= min || d.qend-d.qstart >= min {
					gf.SeqName = d.record.Ref.Name()
					gf.FeatStrand = strandFor(d.record)
					if gf.FeatStrand == seq.Minus {
						len := d.record.Seq.Length
						d.qstart, d.qend = len-d.qend, len-d.qstart
					}

					// Adjust ends based on paired SW alignments.
					var refined bool
					d, refined, err = br.adjust(d)
					if err != nil && *verbose {
						log.Printf("failed alignment %s: %v", d.record.Name, err)
					}

					gf.FeatStart = d.rstart
					gf.FeatEnd = d.rend
					if gf.FeatStart == gf.FeatEnd {
						// This is disgusting garbage resulting from
						// GFF not allowing zero length features.
						gf.FeatEnd++
					}

					if refined {
						gf.FeatAttributes = gf.FeatAttributes[:2]
						gf.FeatAttributes[1].Value = strconv.Itoa(d.dup)
					} else {
						gf.FeatAttributes = gf.FeatAttributes[:1]
					}
					gf.FeatAttributes[0].Value = fmt.Sprintf("%s %d %d", d.record.Name, feat.ZeroToOne(d.qstart), d.qend)
					_, err = w.Write(gf)
					if err != nil {
						return err
					}
				}
				d.record = nil
			}
		}
	}
	return nil
}