func TestRL2EncodeFullRange(t *testing.T) { symbolSetData := make([]byte, 256) for i := range symbolSetData { symbolSetData[i] = byte(i) } // src is the results of MTF where each byte is used in reverse order. src := make([]byte, 256) expected := make([]uint16, 257) for i := range src { src[i] = '\xff' expected[i] = '\u0100' } expected[len(expected)-1] = uint16(len(expected)) _, reduced := symbols.Get(symbolSetData) dst := Encode(reduced, src) if len(dst) != len(expected) { t.Error("RLE2 length doesn't match expected length") } for i, b := range dst { if b != expected[i] { t.Error("Value", int(b), "isn't the expected value", int(expected[i])) } } }
func TestMTFTransformAfterBWT(t *testing.T) { data := []byte("nnbaaa") _, reduced := symbols.Get(data) Transform(reduced, data, data) if string(data) != "\x02\x00\x02\x02\x00\x00" { t.Error("Output is incorrect") } }
func TestMTFTransformOdd(t *testing.T) { data := []byte("baanana") _, reduced := symbols.Get(data) Transform(reduced, data, data) if string(data) != "\x01\x01\x00\x02\x01\x01\x01" { t.Error("Output is incorrect") } }
func TestFrequencies(t *testing.T) { data := []uint16{'\x03', '\x00', '\x03', '\x03', '\x00', '\x01', '\x04'} expectedFreqs := Frequencies{'\x00': 2, '\x01': 1, '\x03': 3, '\x04': 1} _, reduced := symbols.Get([]byte("banana")) freqs := GetFrequencies(reduced, data) for i, f := range freqs { if f != expectedFreqs[i] { t.Error("Frequency", i, "isn't the expected value", f, "should be", expectedFreqs[i]) } } }
func TestMTFTransformFullRange(t *testing.T) { data := make([]byte, 256) for i := range data { data[i] = byte(255 - i) } _, reduced := symbols.Get(data) Transform(reduced, data, data) if data[0] != '\xff' { t.Error("Output is incorrect") } }
func BenchmarkRL2Encode(b *testing.B) { rand.Seed(time.Now().UnixNano()) src := make([]byte, 1000000) for i := range src { src[i] = byte(rand.Intn(256)) } _, reduced := symbols.Get(src) b.ResetTimer() for i := 0; i < b.N; i++ { Encode(reduced, src) } }
func BenchmarkMTFTransformLarge(b *testing.B) { rand.Seed(time.Now().UnixNano()) src := make([]byte, 1000000*6) dst := make([]byte, len(src)) for i := range src { src[i] = byte(rand.Intn(256)) } _, reduced := symbols.Get(src) b.ResetTimer() for i := 0; i < b.N; i++ { Transform(reduced, dst, src) } }
func TestRL2EncodeShortRun(t *testing.T) { src := []byte("\x02\x00\x02\x02\x00\x00") expected := []byte("\x03\x00\x03\x03\x01\x04") _, reduced := symbols.Get([]byte("banana")) dst := Encode(reduced, src) if len(dst) != len(expected) { t.Error("RLE2 length doesn't match expected length") } for i, b := range dst { if b != uint16(expected[i]) { t.Error("Value", int(b), "isn't the expected value", int(expected[i])) } } }
func TestGenerateTreesLowest(t *testing.T) { _, reduced := symbols.Get([]byte("banana")) data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'} freqs := rle2.GetFrequencies(reduced, data) trees, selections := GenerateTrees(freqs, data) if len(trees) < 2 { t.Error("Not enough huffman trees generated") } if len(trees) > 6 { t.Error("Too many huffman trees generated") } if len(selections) != 1 { t.Error("The wrong number of huffman tree selections was returned") } }
func TestTreeCodeLength(t *testing.T) { _, reduced := symbols.Get([]byte("banana")) data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'} freqs := rle2.GetFrequencies(reduced, data) lowestlen := 0 tree := NewTree(freqs) for i, code := range tree.Codes { if i == 0 || code.Len < lowestlen { lowestlen = code.Len } } if lowestlen != tree.Codes['\x03'].Len { t.Error("The lowest code-length isn't the most used symbol") } }
func TestGenerateTreesMultipleSelections(t *testing.T) { _, reduced := symbols.Get([]byte("banana")) data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'} base := data for len(data) <= 200 { data = append(data, base...) } freqs := rle2.GetFrequencies(reduced, data) trees, selections := GenerateTrees(freqs, data) if len(trees) < 2 { t.Error("Not enough huffman trees generated") } if len(trees) > 6 { t.Error("Too many huffman trees generated") } if len(selections) != 5 { t.Error("The wrong number of huffman tree selections was returned") } }
// WriteBlock compresses the content buffered and writes // a block to the bit writer given. func (b *block) WriteBlock(bw *bits.Writer) error { rleData := b.runs.Encode() syms, reducedSyms := symbols.Get(rleData) // BWT step. bwtData := make([]byte, len(rleData)) bwtidx := bwt.Transform(bwtData, rleData) // MTF step. mtfData := bwtData mtf.Transform(reducedSyms, mtfData, bwtData) // RLE2 step. rle2Data := rle2.Encode(reducedSyms, mtfData) freqs := rle2.GetFrequencies(reducedSyms, rle2Data) // Setup the huffman trees required to encode rle2Data. trees, selections := huffman.GenerateTrees(freqs, rle2Data) // Get the MTF encoded huffman tree selections. treeSelectionSymbols := make(symbols.ReducedSet, len(trees)) for i := range trees { treeSelectionSymbols[i] = byte(i) } treeSelectionBytes := make([]byte, len(selections)) for i, selection := range selections { treeSelectionBytes[i] = byte(selection) } mtf.Transform(treeSelectionSymbols, treeSelectionBytes, treeSelectionBytes) // Write the block header. bw.WriteBits(48, blockMagic) bw.WriteBits(32, uint64(b.crc)) bw.WriteBits(1, 0) // Write the contents that build the decoding steps. bw.WriteBits(24, uint64(bwtidx)) b.writeSymbolBitmaps(bw, syms) bw.WriteBits(3, uint64(len(trees))) bw.WriteBits(15, uint64(len(selections))) b.writeTreeSelections(bw, treeSelectionBytes) b.writeTreeCodes(bw, trees) // Write the encoded contents, using the huffman trees generated // switching them out every 50 symbols. encoded := 0 idx := 0 tree := trees[selections[idx]] for _, b := range rle2Data { if encoded == huffman.TreeSelectionLimit { encoded = 0 idx++ tree = trees[selections[idx]] } code := tree.Codes[b] bw.WriteBits(uint(code.Len), code.Bits) encoded++ } return bw.Err() }