func TestDegenerateCodes(t *testing.T) { var vectors = []struct { input prefix.PrefixCodes output prefix.PrefixCodes }{{ input: []prefix.PrefixCode{ {Sym: 0, Len: 1}, }, output: []prefix.PrefixCode{ {Sym: 0, Len: 1, Val: 0}, // 0 {Sym: 258, Len: 1, Val: 1}, // 1 }, }, { input: []prefix.PrefixCode{ {Sym: 0, Len: 1}, {Sym: 1, Len: 1}, {Sym: 2, Len: 1}, }, output: []prefix.PrefixCode{ {Sym: 0, Len: 1, Val: 0}, // 0 {Sym: 1, Len: 1, Val: 1}, // 1 }, }, { input: []prefix.PrefixCode{ {Sym: 0, Len: 3}, {Sym: 1, Len: 4}, {Sym: 2, Len: 3}, }, output: []prefix.PrefixCode{ {Sym: 0, Len: 3, Val: 0}, // 000 {Sym: 1, Len: 4, Val: 2}, // 0010 {Sym: 2, Len: 3, Val: 4}, // 100 {Sym: 258, Len: 4, Val: 10}, // 1010 {Sym: 259, Len: 3, Val: 6}, // 110 {Sym: 260, Len: 1, Val: 1}, // 1 }, }, { input: []prefix.PrefixCode{ {Sym: 0, Len: 1}, {Sym: 1, Len: 3}, {Sym: 2, Len: 4}, {Sym: 3, Len: 3}, {Sym: 4, Len: 2}, }, output: []prefix.PrefixCode{ {Sym: 0, Len: 1, Val: 0}, // 0 {Sym: 1, Len: 3, Val: 3}, // 011 {Sym: 3, Len: 3, Val: 7}, // 111 {Sym: 4, Len: 2, Val: 1}, // 01 }, }} for i, v := range vectors { input := append(prefix.PrefixCodes(nil), v.input...) output := handleDegenerateCodes(input) if !reflect.DeepEqual(output, v.output) { t.Errorf("test %d, output mismatch:\ngot %v\nwant %v", i, output, v.output) } } }
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) { numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols if numSyms < 3 { panic(errInvalid) // Not possible to encode EOF marker } syms = append(syms, uint16(numSyms-1)) // EOF marker // Compute number of prefix trees needed. numTrees := maxNumTrees for i, lim := range []int{200, 600, 1200, 2400} { if len(syms) < lim { numTrees = minNumTrees + i break } } // Compute number of block selectors. numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms treeSels := make([]uint8, numSels) for i := range treeSels { treeSels[i] = uint8(i % numTrees) } // Initialize prefix codes. var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode var codes1D [maxNumTrees]prefix.PrefixCodes var trees1D [maxNumTrees]prefix.Encoder for i := range codes2D[:numTrees] { pc := codes2D[i][:numSyms] for j := range pc { pc[j].Sym = uint32(j) } codes1D[i] = pc } // First cut at assigning prefix trees to each group. var codes prefix.PrefixCodes var blkLen, selIdx int for _, sym := range syms { if blkLen == 0 { blkLen = numBlockSyms codes = codes2D[treeSels[selIdx]][:numSyms] selIdx++ } blkLen-- codes[sym].Cnt++ } // TODO(dsnet): Use K-means to cluster groups to each prefix tree. // Generate lengths and prefixes based on symbol frequencies. for i := range trees1D[:numTrees] { pc := prefix.PrefixCodes(codes2D[i][:numSyms]) pc.SortByCount() if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil { panic(err) } pc.SortBySymbol() } // Write out information about the trees and tree selectors. var mtf internal.MoveToFront zw.wr.WriteBitsBE64(uint64(numTrees), 3) zw.wr.WriteBitsBE64(uint64(numSels), 15) treeSelsMTF := make([]uint8, numSels) copy(treeSelsMTF, treeSels) mtf.Encode(treeSelsMTF) for _, sym := range treeSelsMTF { zw.wr.WriteSymbol(uint(sym), &encSel) } zw.wr.WritePrefixCodes(codes1D[:numTrees], trees1D[:numTrees]) // Write out prefix encoded symbols of compressed data. var tree *prefix.Encoder blkLen, selIdx = 0, 0 for _, sym := range syms { if blkLen == 0 { blkLen = numBlockSyms tree = &trees1D[treeSels[selIdx]] selIdx++ } blkLen-- ok := zw.wr.TryWriteSymbol(uint(sym), tree) if !ok { zw.wr.WriteSymbol(uint(sym), tree) } } }