Ejemplo n.º 1
0
// Write writes r to the BAM stream.
func (bw *Writer) Write(r *sam.Record) error {
	if len(r.Name) == 0 || len(r.Name) > 254 {
		return errors.New("bam: name absent or too long")
	}
	if r.Qual != nil && len(r.Qual) != r.Seq.Length {
		return errors.New("bam: sequence/quality length mismatch")
	}
	tags := buildAux(r.AuxFields)
	recLen := bamFixedRemainder +
		len(r.Name) + 1 + // Null terminated.
		len(r.Cigar)<<2 + // CigarOps are 4 bytes.
		len(r.Seq.Seq) +
		len(r.Qual) +
		len(tags)

	bw.buf.Reset()
	wb := errWriter{w: &bw.buf}
	bin := binaryWriter{w: &wb}

	// Write record header data.
	bin.writeInt32(int32(recLen))
	bin.writeInt32(int32(r.Ref.ID()))
	bin.writeInt32(int32(r.Pos))
	bin.writeUint8(byte(len(r.Name) + 1))
	bin.writeUint8(r.MapQ)
	bin.writeUint16(uint16(r.Bin())) //r.bin
	bin.writeUint16(uint16(len(r.Cigar)))
	bin.writeUint16(uint16(r.Flags))
	bin.writeInt32(int32(r.Seq.Length))
	bin.writeInt32(int32(r.MateRef.ID()))
	bin.writeInt32(int32(r.MatePos))
	bin.writeInt32(int32(r.TempLen))

	// Write variable length data.
	wb.Write(append([]byte(r.Name), 0))
	writeCigarOps(&bin, r.Cigar)
	wb.Write(doublets(r.Seq.Seq).Bytes())
	if r.Qual != nil {
		wb.Write(r.Qual)
	} else {
		for i := 0; i < r.Seq.Length; i++ {
			wb.WriteByte(0xff)
		}
	}
	wb.Write(tags)
	if wb.err != nil {
		return wb.err
	}

	_, err := bw.bg.Write(bw.buf.Bytes())
	return err
}
Ejemplo n.º 2
0
func (d *DiversityFilter) Diff(r *sam.Record, genome []byte) (diff, length int) {
	start := r.Start()
	end := r.End()
	if start < 0 || end > len(genome) {
		if *debug {
			text, err := r.MarshalSAM(sam.FlagDecimal)
			raiseError(err)
			log.Printf("acc: %s, genome length %d, read starts at %d and ends at %d: %s\n", r.Ref.Name(), len(genome), start, end, text)
		}
		length = 0
		return
	}
	refSeq := genome[start:end]
	diff = 0
	read := map2Ref(r)
	length = len(read)
	for i := 0; i < length; i++ {
		if read[i] != refSeq[i] {
			diff++
		}
	}
	return
}
Ejemplo n.º 3
0
func (d *DiversityFilter) filter(buf []*sam.Record, acc string, genome []byte) (out []*sam.Record, acc1 string, genome1 []byte) {
	fn := func(txn *lmdb.Txn) error {
		dbi, err := txn.OpenDBI("read", 0)
		if err != nil {
			return err
		}

		for _, r := range buf {
			key := []byte(r.Name)
			val, err := txn.Get(dbi, key)
			if err != nil {
				if lmdb.IsNotFound(err) {
					val, err = r.MarshalText()
					if err != nil {
						return err
					}
					err = txn.Put(dbi, key, val, 0)
					if err != nil {
						return err
					}
				} else {
					return err
				}
			} else {
				var mate *sam.Record = &sam.Record{}
				err := mate.UnmarshalText(val)
				raiseError(err)
				if r.Ref.Name() == mate.Ref.Name() {
					if acc != r.Ref.Name() {
						genome, err = d.findGenome(r, d.featureDB, "fna")
						raiseError(err)
						acc = r.Ref.Name()
						if *debug {
							log.Println(acc)
						}
					}

					diff1, len1 := d.Diff(r, genome)
					diff2, len2 := d.Diff(mate, genome)
					if len1 > 0 && len2 > 0 && float64(diff1+diff2)/float64(len1+len2) <= d.Cutoff {
						out = append(out, r)
						out = append(out, mate)
					} else {
						if *debug {
							log.Printf("%d, %d, %d, %d\n", diff1, diff2, len1, len2)
						}
					}
				}

				txn.Del(dbi, key, val)
			}

		}
		return nil
	}
retry:
	err := d.db.Update(fn)
	if lmdb.IsMapFull(err) {
		d.sizeDB *= 2
		err = d.db.SetMapSize(d.sizeDB)
		raiseError(err)
		goto retry
	}
	raiseError(err)

	genome1 = genome
	acc1 = acc
	return
}
Ejemplo n.º 4
0
// Read returns the next sam.Record in the BAM stream.
func (br *Reader) Read() (*sam.Record, error) {
	if br.c != nil && vOffset(br.r.LastChunk().End) >= vOffset(br.c.End) {
		return nil, io.EOF
	}

	r := errReader{r: br.r}
	bin := binaryReader{r: &r}

	// Read record header data.
	blockSize := int(bin.readInt32())
	r.n = 0 // The blocksize field is not included in the blocksize.

	// br.r.Chunk() is only valid after the call the Read(), so this
	// must come after the first read in the record.
	tx := br.r.Begin()
	defer func() {
		br.lastChunk = tx.End()
	}()

	var rec sam.Record

	refID := bin.readInt32()
	rec.Pos = int(bin.readInt32())
	nLen := bin.readUint8()
	rec.MapQ = bin.readUint8()
	_ = bin.readUint16()
	nCigar := bin.readUint16()
	rec.Flags = sam.Flags(bin.readUint16())
	lSeq := bin.readInt32()
	nextRefID := bin.readInt32()
	rec.MatePos = int(bin.readInt32())
	rec.TempLen = int(bin.readInt32())
	if r.err != nil {
		return nil, r.err
	}

	// Read variable length data.
	name := make([]byte, nLen)
	if nf, _ := r.Read(name); nf != int(nLen) {
		return nil, errors.New("bam: truncated record name")
	}
	rec.Name = string(name[:len(name)-1]) // The BAM spec indicates name is null terminated.

	rec.Cigar = readCigarOps(&bin, nCigar)
	if r.err != nil {
		return nil, r.err
	}

	seq := make(doublets, (lSeq+1)>>1)
	if nf, _ := r.Read(seq.Bytes()); nf != int((lSeq+1)>>1) {
		return nil, errors.New("bam: truncated sequence")
	}
	rec.Seq = sam.Seq{Length: int(lSeq), Seq: seq}

	rec.Qual = make([]byte, lSeq)
	if nf, _ := r.Read(rec.Qual); nf != int(lSeq) {
		return nil, errors.New("bam: truncated quality")
	}

	auxTags := make([]byte, blockSize-r.n)
	r.Read(auxTags)
	if r.n != blockSize {
		return nil, errors.New("bam: truncated auxilliary data")
	}
	rec.AuxFields = parseAux(auxTags)

	if r.err != nil {
		return nil, r.err
	}

	refs := int32(len(br.h.Refs()))
	if refID != -1 {
		if refID < -1 || refID >= refs {
			return nil, errors.New("bam: reference id out of range")
		}
		rec.Ref = br.h.Refs()[refID]
	}
	if nextRefID != -1 {
		if nextRefID < -1 || nextRefID >= refs {
			return nil, errors.New("bam: mate reference id out of range")
		}
		rec.MateRef = br.h.Refs()[nextRefID]
	}

	return &rec, nil
}
Ejemplo n.º 5
0
// Add records the SAM record as having being located at the given chunk.
func (i *Index) Add(r *sam.Record, c bgzf.Chunk) error {
	return i.idx.Add(r, uint32(r.Bin()), c, isPlaced(r), isMapped(r))
}