func (c *Compressor) BenchmarkBGZFReader(b *testing.B) { cr, _ := bgzf.NewReader(c.w, 0) b.ResetTimer() _, err := io.Copy(ioutil.Discard, cr) if err != nil { b.Fatal(err) } }
// Test for issue #6 https://github.com/biogo/hts/issues/6 func (s *S) TestChunkReader(c *check.C) { br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc) c.Assert(err, check.Equals, nil) defer br.Close() cr, err := NewChunkReader(br, conceptualChunks) c.Assert(err, check.Equals, nil) defer cr.Close() // 2 is shorter than the length of the first block. // This panics prior to the fix. n, err := cr.Read(make([]byte, 2)) c.Check(n, check.Equals, 2) c.Check(err, check.Equals, nil) }
// create a new bix that does as little as possible from the old bix func newShort(old *Bix) (*Bix, error) { tbx := &Bix{ Index: old.Index, path: old.path, VReader: old.VReader, refalt: old.refalt, } var err error tbx.file, err = os.Open(tbx.path) if err != nil { return nil, err } tbx.bgzf, err = bgzf.NewReader(tbx.file, 1) if err != nil { return nil, err } return tbx, nil }
// NewReader returns a new Reader using the given io.Reader // and setting the read concurrency to rd. If rd is zero // concurrency is set to GOMAXPROCS. func NewReader(r io.Reader, rd int) (*Reader, error) { bg, err := bgzf.NewReader(r, rd) if err != nil { return nil, err } h, _ := sam.NewHeader(nil, nil) br := &Reader{ r: bg, h: h, } err = br.h.DecodeBinary(br.r) if err != nil { return nil, err } br.lastChunk.End = br.r.LastChunk().End return br, nil }
func (comdb *CompressedDB) ReadSeqFromCompressedSource(coarsedb *CoarseDB, orgSeqId int) (OriginalSeq, error) { if !comdb.CompressedSource { fmt.Errorf("Trying to read compressed source but database does not indicate that source files are compressed") } off, err := comdb.orgSeqOffset(orgSeqId) block := off / bgzf.BlockSize posInBlock := off % bgzf.BlockSize if err != nil { return OriginalSeq{}, err } offset := bgzf.Offset{ //File: off, Block: uint16(block), } // I'm not convinced this is correct but it's as close as I could gather from the bgzf tests // Vprintf("Looking for seqid=%d using offset File=%d Block=%d posInBlock=%d\n", orgSeqId, offset.File, offset.Block, posInBlock) comdb.File.Seek(0, 0) // Seek to the begining of the file compressedReader, err := bgzf.NewReader(comdb.File, 0) // O indicates that bgzf should use GO_MAX_PROCS for decompression if err != nil { return OriginalSeq{}, err } err = compressedReader.Seek(offset) if err != nil { return OriginalSeq{}, err } blockData := make([]byte, bgzf.BlockSize) compressedReader.Read(blockData) blockData = blockData[posInBlock:] blockReader := bytes.NewReader(blockData) oseq, err := comdb.ReadNextSeq(coarsedb, blockReader, orgSeqId) if err != nil { return oseq, fmt.Errorf("Error reading next sequence: %s\n", err) } else if oseq.Id != orgSeqId { return oseq, fmt.Errorf("Wanted to read sequence %d but read %d instead\n", orgSeqId, oseq.Id) } return oseq, nil }
// Test for issue #8 https://github.com/biogo/hts/issues/8 func (s *S) TestIssue8(c *check.C) { br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc) c.Assert(err, check.Equals, nil) defer br.Close() cr, err := NewChunkReader(br, conceptualChunks[:2]) c.Assert(err, check.Equals, nil) defer cr.Close() var last []byte for { p := make([]byte, 1024) n, err := cr.Read(p) if n != 0 { c.Check(p[:n], check.Not(check.DeepEquals), last[:min(n, len(last))]) } last = p if err != nil { if err == io.EOF { break } c.Fatalf("unexpected error: %v", err) } } }
// New returns a &Bix func New(path string, workers ...int) (*Bix, error) { f, err := os.Open(path + ".tbi") if err != nil { return nil, err } defer f.Close() gz, err := gzip.NewReader(f) if err != nil { return nil, err } defer gz.Close() idx, err := tabix.ReadFrom(gz) if err != nil { return nil, err } n := 1 if len(workers) > 0 { n = workers[0] } b, err := os.Open(path) if err != nil { return nil, err } bgz, err := bgzf.NewReader(b, n) if err != nil { return nil, err } var h []string tbx := &Bix{bgzf: bgz, path: path, file: b} buf := bufio.NewReader(bgz) l, err := buf.ReadString('\n') if err != nil { return tbx, err } for i := 0; i < int(idx.Skip) || rune(l[0]) == idx.MetaChar; i++ { h = append(h, l) l, err = buf.ReadString('\n') if err != nil { return tbx, err } } header := strings.Join(h, "") if len(h) > 0 && strings.HasSuffix(tbx.path, ".vcf.gz") { var err error h := strings.NewReader(header) tbx.VReader, err = vcfgo.NewReader(h, true) if err != nil { return nil, err } } else if len(h) > 0 { htab := strings.Split(strings.TrimSpace(h[len(h)-1]), "\t") // try to find ref and alternate columns to make an IREFALT for i, hdr := range htab { if l := strings.ToLower(hdr); l == "ref" || l == "reference" { tbx.refalt = append(tbx.refalt, i) break } } for i, hdr := range htab { if l := strings.ToLower(hdr); l == "alt" || l == "alternate" { tbx.refalt = append(tbx.refalt, i) break } } if len(tbx.refalt) != 2 { tbx.refalt = nil } } tbx.buf = buf tbx.Index = idx return tbx, nil }
func (s *S) TestIssue10(c *check.C) { for _, test := range issue10Tests { var buf bytes.Buffer // Write the set of words to a bgzf stream. w := bgzf.NewWriter(&buf, *conc) for _, wb := range test.words { w.Write([]byte(wb.word)) if wb.flush { w.Flush() } } w.Close() for _, strategy := range []MergeStrategy{nil, adjacent} { if strategy != nil && !test.canSquash { continue } for _, clean := range []bool{false, true} { for _, truncFinal := range []bool{false, true} { if truncFinal && !test.canTrunc { continue } // Build an index into the words. r, err := bgzf.NewReader(bytes.NewReader(buf.Bytes()), *conc) c.Assert(err, check.Equals, nil) idx := make(map[string]bgzf.Chunk) for i, wb := range test.words { p := make([]byte, len(wb.word)) n, err := r.Read(p) c.Assert(err, check.Equals, nil) c.Assert(string(p[:n]), check.Equals, wb.word) last := r.LastChunk() if !clean { // This simulates the index construction behaviour // that appears to be what is done by htslib. The // behaviour of bgzf is to elide seeks that will not // result in a productive read. if i != 0 && test.words[i-1].flush { last.Begin = idx[test.words[i-1].word].End } } idx[wb.word] = last } var chunks []bgzf.Chunk for _, w := range test.chunks { chunks = append(chunks, idx[w]) } var want string if truncFinal { want = strings.Join(test.chunks[:len(test.chunks)-1], "") chunks[len(chunks)-2].End = chunks[len(chunks)-1].Begin chunks = chunks[:len(chunks)-1] } else { want = strings.Join(test.chunks, "") } if strategy != nil { chunks = strategy(chunks) } cr, err := NewChunkReader(r, chunks) c.Assert(err, check.Equals, nil) var got bytes.Buffer io.Copy(&got, cr) c.Check(got.String(), check.Equals, want, check.Commentf("clean=%t merge=%t trunc=%t chunks=%+v", clean, strategy != nil, truncFinal, chunks), ) } } } } }