Ejemplo n.º 1
0
func TestDegenerateCodes(t *testing.T) {
	var vectors = []struct {
		input  prefix.PrefixCodes
		output prefix.PrefixCodes
	}{{
		input: []prefix.PrefixCode{
			{Sym: 0, Len: 1},
		},
		output: []prefix.PrefixCode{
			{Sym: 0, Len: 1, Val: 0},   // 0
			{Sym: 258, Len: 1, Val: 1}, // 1
		},
	}, {
		input: []prefix.PrefixCode{
			{Sym: 0, Len: 1},
			{Sym: 1, Len: 1},
			{Sym: 2, Len: 1},
		},
		output: []prefix.PrefixCode{
			{Sym: 0, Len: 1, Val: 0}, // 0
			{Sym: 1, Len: 1, Val: 1}, // 1
		},
	}, {
		input: []prefix.PrefixCode{
			{Sym: 0, Len: 3},
			{Sym: 1, Len: 4},
			{Sym: 2, Len: 3},
		},
		output: []prefix.PrefixCode{
			{Sym: 0, Len: 3, Val: 0},    //  000
			{Sym: 1, Len: 4, Val: 2},    // 0010
			{Sym: 2, Len: 3, Val: 4},    //  100
			{Sym: 258, Len: 4, Val: 10}, // 1010
			{Sym: 259, Len: 3, Val: 6},  //  110
			{Sym: 260, Len: 1, Val: 1},  //    1
		},
	}, {
		input: []prefix.PrefixCode{
			{Sym: 0, Len: 1},
			{Sym: 1, Len: 3},
			{Sym: 2, Len: 4},
			{Sym: 3, Len: 3},
			{Sym: 4, Len: 2},
		},
		output: []prefix.PrefixCode{
			{Sym: 0, Len: 1, Val: 0}, //   0
			{Sym: 1, Len: 3, Val: 3}, // 011
			{Sym: 3, Len: 3, Val: 7}, // 111
			{Sym: 4, Len: 2, Val: 1}, //  01
		},
	}}

	for i, v := range vectors {
		input := append(prefix.PrefixCodes(nil), v.input...)
		output := handleDegenerateCodes(input)

		if !reflect.DeepEqual(output, v.output) {
			t.Errorf("test %d, output mismatch:\ngot  %v\nwant %v", i, output, v.output)
		}
	}
}
Ejemplo n.º 2
0
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) {
	numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
	if numSyms < 3 {
		panic(errInvalid) // Not possible to encode EOF marker
	}
	syms = append(syms, uint16(numSyms-1)) // EOF marker

	// Compute number of prefix trees needed.
	numTrees := maxNumTrees
	for i, lim := range []int{200, 600, 1200, 2400} {
		if len(syms) < lim {
			numTrees = minNumTrees + i
			break
		}
	}

	// Compute number of block selectors.
	numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms
	treeSels := make([]uint8, numSels)
	for i := range treeSels {
		treeSels[i] = uint8(i % numTrees)
	}

	// Initialize prefix codes.
	var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
	var codes1D [maxNumTrees]prefix.PrefixCodes
	var trees1D [maxNumTrees]prefix.Encoder
	for i := range codes2D[:numTrees] {
		pc := codes2D[i][:numSyms]
		for j := range pc {
			pc[j].Sym = uint32(j)
		}
		codes1D[i] = pc
	}

	// First cut at assigning prefix trees to each group.
	var codes prefix.PrefixCodes
	var blkLen, selIdx int
	for _, sym := range syms {
		if blkLen == 0 {
			blkLen = numBlockSyms
			codes = codes2D[treeSels[selIdx]][:numSyms]
			selIdx++
		}
		blkLen--
		codes[sym].Cnt++
	}

	// TODO(dsnet): Use K-means to cluster groups to each prefix tree.

	// Generate lengths and prefixes based on symbol frequencies.
	for i := range trees1D[:numTrees] {
		pc := prefix.PrefixCodes(codes2D[i][:numSyms])
		pc.SortByCount()
		if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil {
			panic(err)
		}
		pc.SortBySymbol()
	}

	// Write out information about the trees and tree selectors.
	var mtf internal.MoveToFront
	zw.wr.WriteBitsBE64(uint64(numTrees), 3)
	zw.wr.WriteBitsBE64(uint64(numSels), 15)
	treeSelsMTF := make([]uint8, numSels)
	copy(treeSelsMTF, treeSels)
	mtf.Encode(treeSelsMTF)
	for _, sym := range treeSelsMTF {
		zw.wr.WriteSymbol(uint(sym), &encSel)
	}
	zw.wr.WritePrefixCodes(codes1D[:numTrees], trees1D[:numTrees])

	// Write out prefix encoded symbols of compressed data.
	var tree *prefix.Encoder
	blkLen, selIdx = 0, 0
	for _, sym := range syms {
		if blkLen == 0 {
			blkLen = numBlockSyms
			tree = &trees1D[treeSels[selIdx]]
			selIdx++
		}
		blkLen--
		ok := zw.wr.TryWriteSymbol(uint(sym), tree)
		if !ok {
			zw.wr.WriteSymbol(uint(sym), tree)
		}
	}
}