func (comdb *CompressedDB) writer() { var record []string var err error var cseq *CompressedSeq byteOffset := int64(0) buf := new(bytes.Buffer) var compressedWriter *bgzf.Writer if comdb.CompressedSource { compressedWriter = bgzf.NewWriter(comdb.File, 0) // O indicates that bgzf should use GO_MAX_PROCS for compression } csvWriter := csv.NewWriter(buf) csvWriter.Comma = ',' csvWriter.UseCRLF = false saved := make([]CompressedSeq, 0, 1000) nextIndex := comdb.NumSequences() // If we're appending to the index, set the byteOffset to be at the end // of the current compressed database. if comdb.indexSize > 0 { info, err := comdb.File.Stat() if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(1) } byteOffset = info.Size() } for possible := range comdb.writerChan { // We have to preserve the order of compressed sequences, so we don't // write anything until we have the next sequence that we expect. if possible.Id < nextIndex { panic(fmt.Sprintf("BUG: Next sequence expected is '%d', but "+ "we have an earlier sequence: %d", nextIndex, possible.Id)) } saved = append(saved, possible) cseq, saved = nextSeqToWrite(nextIndex, saved) for cseq != nil { // Reset the buffer so it's empty. We want it to only contain // the next record we're writing. buf.Reset() // Allocate memory for creating the next record. // A record is a sequence name followed by four-tuples of links: // (coarse-seq-id, coarse-start, coarse-end, diff). record = make([]string, 0, 1+4*len(cseq.Links)) record = append(record, cseq.Name) for _, link := range cseq.Links { record = append(record, fmt.Sprintf("%d", link.CoarseSeqId), fmt.Sprintf("%d", link.CoarseStart), fmt.Sprintf("%d", link.CoarseEnd), link.Diff) } // Write the record to our *buffer* and flush it. if err = csvWriter.Write(record); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(1) } csvWriter.Flush() // Pass the bytes on to the compressed file. if comdb.CompressedSource { if _, err = compressedWriter.Write(buf.Bytes()); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(1) } compressedWriter.Flush() compressedWriter.Wait() } else { if _, err = comdb.File.Write(buf.Bytes()); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(1) } } // Now write the byte offset that points to the start of this record err = binary.Write(comdb.Index, binary.BigEndian, byteOffset) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(1) } // Increment the byte offset to be at the end of this record. byteOffset += int64(buf.Len()) nextIndex++ cseq, saved = nextSeqToWrite(nextIndex, saved) } } if comdb.CompressedSource { compressedWriter.Close() } comdb.Index.Close() comdb.File.Close() comdb.writerDone <- struct{}{} }
func (s *S) TestIssue10(c *check.C) { for _, test := range issue10Tests { var buf bytes.Buffer // Write the set of words to a bgzf stream. w := bgzf.NewWriter(&buf, *conc) for _, wb := range test.words { w.Write([]byte(wb.word)) if wb.flush { w.Flush() } } w.Close() for _, strategy := range []MergeStrategy{nil, adjacent} { if strategy != nil && !test.canSquash { continue } for _, clean := range []bool{false, true} { for _, truncFinal := range []bool{false, true} { if truncFinal && !test.canTrunc { continue } // Build an index into the words. r, err := bgzf.NewReader(bytes.NewReader(buf.Bytes()), *conc) c.Assert(err, check.Equals, nil) idx := make(map[string]bgzf.Chunk) for i, wb := range test.words { p := make([]byte, len(wb.word)) n, err := r.Read(p) c.Assert(err, check.Equals, nil) c.Assert(string(p[:n]), check.Equals, wb.word) last := r.LastChunk() if !clean { // This simulates the index construction behaviour // that appears to be what is done by htslib. The // behaviour of bgzf is to elide seeks that will not // result in a productive read. if i != 0 && test.words[i-1].flush { last.Begin = idx[test.words[i-1].word].End } } idx[wb.word] = last } var chunks []bgzf.Chunk for _, w := range test.chunks { chunks = append(chunks, idx[w]) } var want string if truncFinal { want = strings.Join(test.chunks[:len(test.chunks)-1], "") chunks[len(chunks)-2].End = chunks[len(chunks)-1].Begin chunks = chunks[:len(chunks)-1] } else { want = strings.Join(test.chunks, "") } if strategy != nil { chunks = strategy(chunks) } cr, err := NewChunkReader(r, chunks) c.Assert(err, check.Equals, nil) var got bytes.Buffer io.Copy(&got, cr) c.Check(got.String(), check.Equals, want, check.Commentf("clean=%t merge=%t trunc=%t chunks=%+v", clean, strategy != nil, truncFinal, chunks), ) } } } } }