Example #1
0
func (c *Compressor) BenchmarkBGZFReader(b *testing.B) {
	cr, _ := bgzf.NewReader(c.w, 0)
	b.ResetTimer()

	_, err := io.Copy(ioutil.Discard, cr)
	if err != nil {
		b.Fatal(err)
	}
}
Example #2
0
// Test for issue #6 https://github.com/biogo/hts/issues/6
func (s *S) TestChunkReader(c *check.C) {
	br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc)
	c.Assert(err, check.Equals, nil)
	defer br.Close()
	cr, err := NewChunkReader(br, conceptualChunks)
	c.Assert(err, check.Equals, nil)
	defer cr.Close()
	// 2 is shorter than the length of the first block.
	// This panics prior to the fix.
	n, err := cr.Read(make([]byte, 2))
	c.Check(n, check.Equals, 2)
	c.Check(err, check.Equals, nil)
}
Example #3
0
File: bix.go Project: brentp/bix
// create a new bix that does as little as possible from the old bix
func newShort(old *Bix) (*Bix, error) {
	tbx := &Bix{
		Index:   old.Index,
		path:    old.path,
		VReader: old.VReader,
		refalt:  old.refalt,
	}
	var err error
	tbx.file, err = os.Open(tbx.path)
	if err != nil {
		return nil, err
	}
	tbx.bgzf, err = bgzf.NewReader(tbx.file, 1)
	if err != nil {
		return nil, err
	}
	return tbx, nil
}
Example #4
0
// NewReader returns a new Reader using the given io.Reader
// and setting the read concurrency to rd. If rd is zero
// concurrency is set to GOMAXPROCS.
func NewReader(r io.Reader, rd int) (*Reader, error) {
	bg, err := bgzf.NewReader(r, rd)
	if err != nil {
		return nil, err
	}
	h, _ := sam.NewHeader(nil, nil)
	br := &Reader{
		r: bg,
		h: h,
	}
	err = br.h.DecodeBinary(br.r)
	if err != nil {
		return nil, err
	}
	br.lastChunk.End = br.r.LastChunk().End

	return br, nil
}
Example #5
0
File: io.go Project: Rinoahu/MICA
func (comdb *CompressedDB) ReadSeqFromCompressedSource(coarsedb *CoarseDB, orgSeqId int) (OriginalSeq, error) {
	if !comdb.CompressedSource {
		fmt.Errorf("Trying to read compressed source but database does not indicate that source files are compressed")
	}

	off, err := comdb.orgSeqOffset(orgSeqId)
	block := off / bgzf.BlockSize
	posInBlock := off % bgzf.BlockSize

	if err != nil {
		return OriginalSeq{}, err
	}
	offset := bgzf.Offset{
		//File:  off,
		Block: uint16(block),
	} // I'm not convinced this is correct but it's as close as I could gather from the bgzf tests

	//	Vprintf("Looking for seqid=%d using offset File=%d Block=%d posInBlock=%d\n", orgSeqId, offset.File, offset.Block, posInBlock)

	comdb.File.Seek(0, 0) // Seek to the begining of the file

	compressedReader, err := bgzf.NewReader(comdb.File, 0) // O indicates that bgzf should use GO_MAX_PROCS for decompression
	if err != nil {
		return OriginalSeq{}, err
	}
	err = compressedReader.Seek(offset)
	if err != nil {
		return OriginalSeq{}, err
	}
	blockData := make([]byte, bgzf.BlockSize)
	compressedReader.Read(blockData)
	blockData = blockData[posInBlock:]

	blockReader := bytes.NewReader(blockData)
	oseq, err := comdb.ReadNextSeq(coarsedb, blockReader, orgSeqId)
	if err != nil {
		return oseq, fmt.Errorf("Error reading next sequence: %s\n", err)
	} else if oseq.Id != orgSeqId {
		return oseq, fmt.Errorf("Wanted to read sequence %d but read %d instead\n", orgSeqId, oseq.Id)
	}

	return oseq, nil
}
Example #6
0
// Test for issue #8 https://github.com/biogo/hts/issues/8
func (s *S) TestIssue8(c *check.C) {
	br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc)
	c.Assert(err, check.Equals, nil)
	defer br.Close()
	cr, err := NewChunkReader(br, conceptualChunks[:2])
	c.Assert(err, check.Equals, nil)
	defer cr.Close()
	var last []byte
	for {
		p := make([]byte, 1024)
		n, err := cr.Read(p)
		if n != 0 {
			c.Check(p[:n], check.Not(check.DeepEquals), last[:min(n, len(last))])
		}
		last = p
		if err != nil {
			if err == io.EOF {
				break
			}
			c.Fatalf("unexpected error: %v", err)
		}
	}
}
Example #7
0
File: bix.go Project: brentp/bix
// New returns a &Bix
func New(path string, workers ...int) (*Bix, error) {
	f, err := os.Open(path + ".tbi")
	if err != nil {
		return nil, err
	}
	defer f.Close()

	gz, err := gzip.NewReader(f)
	if err != nil {
		return nil, err
	}
	defer gz.Close()

	idx, err := tabix.ReadFrom(gz)
	if err != nil {
		return nil, err
	}
	n := 1
	if len(workers) > 0 {
		n = workers[0]
	}

	b, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	bgz, err := bgzf.NewReader(b, n)
	if err != nil {
		return nil, err
	}

	var h []string
	tbx := &Bix{bgzf: bgz, path: path, file: b}

	buf := bufio.NewReader(bgz)
	l, err := buf.ReadString('\n')
	if err != nil {
		return tbx, err
	}

	for i := 0; i < int(idx.Skip) || rune(l[0]) == idx.MetaChar; i++ {
		h = append(h, l)
		l, err = buf.ReadString('\n')
		if err != nil {
			return tbx, err
		}
	}
	header := strings.Join(h, "")

	if len(h) > 0 && strings.HasSuffix(tbx.path, ".vcf.gz") {
		var err error
		h := strings.NewReader(header)

		tbx.VReader, err = vcfgo.NewReader(h, true)
		if err != nil {
			return nil, err
		}
	} else if len(h) > 0 {
		htab := strings.Split(strings.TrimSpace(h[len(h)-1]), "\t")
		// try to find ref and alternate columns to make an IREFALT
		for i, hdr := range htab {
			if l := strings.ToLower(hdr); l == "ref" || l == "reference" {
				tbx.refalt = append(tbx.refalt, i)
				break
			}
		}
		for i, hdr := range htab {
			if l := strings.ToLower(hdr); l == "alt" || l == "alternate" {
				tbx.refalt = append(tbx.refalt, i)
				break
			}
		}
		if len(tbx.refalt) != 2 {
			tbx.refalt = nil
		}
	}
	tbx.buf = buf
	tbx.Index = idx
	return tbx, nil
}
Example #8
0
func (s *S) TestIssue10(c *check.C) {
	for _, test := range issue10Tests {
		var buf bytes.Buffer

		// Write the set of words to a bgzf stream.
		w := bgzf.NewWriter(&buf, *conc)
		for _, wb := range test.words {
			w.Write([]byte(wb.word))
			if wb.flush {
				w.Flush()
			}
		}
		w.Close()

		for _, strategy := range []MergeStrategy{nil, adjacent} {
			if strategy != nil && !test.canSquash {
				continue
			}
			for _, clean := range []bool{false, true} {
				for _, truncFinal := range []bool{false, true} {
					if truncFinal && !test.canTrunc {
						continue
					}
					// Build an index into the words.
					r, err := bgzf.NewReader(bytes.NewReader(buf.Bytes()), *conc)
					c.Assert(err, check.Equals, nil)
					idx := make(map[string]bgzf.Chunk)
					for i, wb := range test.words {
						p := make([]byte, len(wb.word))
						n, err := r.Read(p)
						c.Assert(err, check.Equals, nil)
						c.Assert(string(p[:n]), check.Equals, wb.word)

						last := r.LastChunk()
						if !clean {
							// This simulates the index construction behaviour
							// that appears to be what is done by htslib. The
							// behaviour of bgzf is to elide seeks that will not
							// result in a productive read.
							if i != 0 && test.words[i-1].flush {
								last.Begin = idx[test.words[i-1].word].End
							}
						}
						idx[wb.word] = last
					}

					var chunks []bgzf.Chunk
					for _, w := range test.chunks {
						chunks = append(chunks, idx[w])
					}
					var want string
					if truncFinal {
						want = strings.Join(test.chunks[:len(test.chunks)-1], "")
						chunks[len(chunks)-2].End = chunks[len(chunks)-1].Begin
						chunks = chunks[:len(chunks)-1]
					} else {
						want = strings.Join(test.chunks, "")
					}

					if strategy != nil {
						chunks = strategy(chunks)
					}
					cr, err := NewChunkReader(r, chunks)
					c.Assert(err, check.Equals, nil)

					var got bytes.Buffer
					io.Copy(&got, cr)
					c.Check(got.String(), check.Equals, want,
						check.Commentf("clean=%t merge=%t trunc=%t chunks=%+v", clean, strategy != nil, truncFinal, chunks),
					)
				}
			}
		}
	}
}