Beispiel #1
0
func TestRL2EncodeFullRange(t *testing.T) {
	symbolSetData := make([]byte, 256)
	for i := range symbolSetData {
		symbolSetData[i] = byte(i)
	}

	// src is the results of MTF where each byte is used in reverse order.
	src := make([]byte, 256)
	expected := make([]uint16, 257)
	for i := range src {
		src[i] = '\xff'
		expected[i] = '\u0100'
	}
	expected[len(expected)-1] = uint16(len(expected))

	_, reduced := symbols.Get(symbolSetData)
	dst := Encode(reduced, src)
	if len(dst) != len(expected) {
		t.Error("RLE2 length doesn't match expected length")
	}

	for i, b := range dst {
		if b != expected[i] {
			t.Error("Value", int(b), "isn't the expected value", int(expected[i]))
		}
	}
}
Beispiel #2
0
func TestMTFTransformAfterBWT(t *testing.T) {
	data := []byte("nnbaaa")
	_, reduced := symbols.Get(data)
	Transform(reduced, data, data)

	if string(data) != "\x02\x00\x02\x02\x00\x00" {
		t.Error("Output is incorrect")
	}
}
Beispiel #3
0
func TestMTFTransformOdd(t *testing.T) {
	data := []byte("baanana")
	_, reduced := symbols.Get(data)
	Transform(reduced, data, data)

	if string(data) != "\x01\x01\x00\x02\x01\x01\x01" {
		t.Error("Output is incorrect")
	}
}
Beispiel #4
0
func TestFrequencies(t *testing.T) {
	data := []uint16{'\x03', '\x00', '\x03', '\x03', '\x00', '\x01', '\x04'}
	expectedFreqs := Frequencies{'\x00': 2, '\x01': 1, '\x03': 3, '\x04': 1}

	_, reduced := symbols.Get([]byte("banana"))
	freqs := GetFrequencies(reduced, data)
	for i, f := range freqs {
		if f != expectedFreqs[i] {
			t.Error("Frequency", i, "isn't the expected value", f, "should be", expectedFreqs[i])
		}
	}
}
Beispiel #5
0
func TestMTFTransformFullRange(t *testing.T) {
	data := make([]byte, 256)
	for i := range data {
		data[i] = byte(255 - i)
	}

	_, reduced := symbols.Get(data)
	Transform(reduced, data, data)
	if data[0] != '\xff' {
		t.Error("Output is incorrect")
	}
}
Beispiel #6
0
func BenchmarkRL2Encode(b *testing.B) {
	rand.Seed(time.Now().UnixNano())

	src := make([]byte, 1000000)
	for i := range src {
		src[i] = byte(rand.Intn(256))
	}
	_, reduced := symbols.Get(src)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		Encode(reduced, src)
	}
}
Beispiel #7
0
func BenchmarkMTFTransformLarge(b *testing.B) {
	rand.Seed(time.Now().UnixNano())

	src := make([]byte, 1000000*6)
	dst := make([]byte, len(src))
	for i := range src {
		src[i] = byte(rand.Intn(256))
	}
	_, reduced := symbols.Get(src)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		Transform(reduced, dst, src)
	}
}
Beispiel #8
0
func TestRL2EncodeShortRun(t *testing.T) {
	src := []byte("\x02\x00\x02\x02\x00\x00")
	expected := []byte("\x03\x00\x03\x03\x01\x04")

	_, reduced := symbols.Get([]byte("banana"))
	dst := Encode(reduced, src)
	if len(dst) != len(expected) {
		t.Error("RLE2 length doesn't match expected length")
	}

	for i, b := range dst {
		if b != uint16(expected[i]) {
			t.Error("Value", int(b), "isn't the expected value", int(expected[i]))
		}
	}
}
Beispiel #9
0
func TestGenerateTreesLowest(t *testing.T) {
	_, reduced := symbols.Get([]byte("banana"))
	data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'}
	freqs := rle2.GetFrequencies(reduced, data)

	trees, selections := GenerateTrees(freqs, data)
	if len(trees) < 2 {
		t.Error("Not enough huffman trees generated")
	}
	if len(trees) > 6 {
		t.Error("Too many huffman trees generated")
	}

	if len(selections) != 1 {
		t.Error("The wrong number of huffman tree selections was returned")
	}
}
Beispiel #10
0
func TestTreeCodeLength(t *testing.T) {
	_, reduced := symbols.Get([]byte("banana"))
	data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'}
	freqs := rle2.GetFrequencies(reduced, data)

	lowestlen := 0
	tree := NewTree(freqs)
	for i, code := range tree.Codes {
		if i == 0 || code.Len < lowestlen {
			lowestlen = code.Len
		}
	}

	if lowestlen != tree.Codes['\x03'].Len {
		t.Error("The lowest code-length isn't the most used symbol")
	}
}
Beispiel #11
0
func TestGenerateTreesMultipleSelections(t *testing.T) {
	_, reduced := symbols.Get([]byte("banana"))
	data := []uint16{'\x03', '\x00', '\x03', '\x00', '\x01', '\x04'}
	base := data
	for len(data) <= 200 {
		data = append(data, base...)
	}
	freqs := rle2.GetFrequencies(reduced, data)

	trees, selections := GenerateTrees(freqs, data)
	if len(trees) < 2 {
		t.Error("Not enough huffman trees generated")
	}
	if len(trees) > 6 {
		t.Error("Too many huffman trees generated")
	}

	if len(selections) != 5 {
		t.Error("The wrong number of huffman tree selections was returned")
	}
}
Beispiel #12
0
// WriteBlock compresses the content buffered and writes
// a block to the bit writer given.
func (b *block) WriteBlock(bw *bits.Writer) error {
	rleData := b.runs.Encode()
	syms, reducedSyms := symbols.Get(rleData)

	// BWT step.
	bwtData := make([]byte, len(rleData))
	bwtidx := bwt.Transform(bwtData, rleData)

	// MTF step.
	mtfData := bwtData
	mtf.Transform(reducedSyms, mtfData, bwtData)

	// RLE2 step.
	rle2Data := rle2.Encode(reducedSyms, mtfData)
	freqs := rle2.GetFrequencies(reducedSyms, rle2Data)

	// Setup the huffman trees required to encode rle2Data.
	trees, selections := huffman.GenerateTrees(freqs, rle2Data)

	// Get the MTF encoded huffman tree selections.
	treeSelectionSymbols := make(symbols.ReducedSet, len(trees))
	for i := range trees {
		treeSelectionSymbols[i] = byte(i)
	}
	treeSelectionBytes := make([]byte, len(selections))
	for i, selection := range selections {
		treeSelectionBytes[i] = byte(selection)
	}
	mtf.Transform(treeSelectionSymbols, treeSelectionBytes, treeSelectionBytes)

	// Write the block header.
	bw.WriteBits(48, blockMagic)
	bw.WriteBits(32, uint64(b.crc))
	bw.WriteBits(1, 0)

	// Write the contents that build the decoding steps.
	bw.WriteBits(24, uint64(bwtidx))
	b.writeSymbolBitmaps(bw, syms)
	bw.WriteBits(3, uint64(len(trees)))
	bw.WriteBits(15, uint64(len(selections)))
	b.writeTreeSelections(bw, treeSelectionBytes)
	b.writeTreeCodes(bw, trees)

	// Write the encoded contents, using the huffman trees generated
	// switching them out every 50 symbols.
	encoded := 0
	idx := 0
	tree := trees[selections[idx]]
	for _, b := range rle2Data {
		if encoded == huffman.TreeSelectionLimit {
			encoded = 0
			idx++
			tree = trees[selections[idx]]
		}
		code := tree.Codes[b]

		bw.WriteBits(uint(code.Len), code.Bits)
		encoded++
	}

	return bw.Err()
}