func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) { numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols if numSyms < 3 { panic(ErrCorrupt) // Not possible to encode EOF marker } // Read information about the trees and tree selectors. var mtf internal.MoveToFront numTrees := int(zr.rd.ReadBitsBE64(3)) if numTrees < minNumTrees || numTrees > maxNumTrees { panic(ErrCorrupt) } numSels := int(zr.rd.ReadBitsBE64(15)) treeSels := make([]uint8, numSels) for i := range treeSels { sym, ok := zr.rd.TryReadSymbol(&decSel) if !ok { sym = zr.rd.ReadSymbol(&decSel) } if int(sym) >= numTrees { panic(ErrCorrupt) } treeSels[i] = uint8(sym) } mtf.Decode(treeSels) // Initialize prefix codes. var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode var codes1D [maxNumTrees]prefix.PrefixCodes var trees1D [maxNumTrees]prefix.Decoder for i := range codes2D[:numTrees] { pc := codes2D[i][:numSyms] for j := range pc { pc[j].Sym = uint32(j) } codes1D[i] = pc } zr.rd.ReadPrefixCodes(codes1D[:numTrees], trees1D[:numTrees]) // Read prefix encoded symbols of compressed data. var tree *prefix.Decoder var blkLen, selIdx int for { if blkLen == 0 { blkLen = numBlockSyms if selIdx >= len(treeSels) { panic(ErrCorrupt) } tree = &trees1D[treeSels[selIdx]] selIdx++ } blkLen-- sym, ok := zr.rd.TryReadSymbol(tree) if !ok { sym = zr.rd.ReadSymbol(tree) } if int(sym) == numSyms-1 { break // EOF marker } if int(sym) >= numSyms { panic(ErrCorrupt) // Invalid symbol used } if len(syms) >= zr.level*blockSize { panic(ErrCorrupt) // Block is too large } syms = append(syms, uint16(sym)) } return syms }
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) { numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols if numSyms < 3 { panic(errInvalid) // Not possible to encode EOF marker } syms = append(syms, uint16(numSyms-1)) // EOF marker // Compute number of prefix trees needed. numTrees := maxNumTrees for i, lim := range []int{200, 600, 1200, 2400} { if len(syms) < lim { numTrees = minNumTrees + i break } } // Compute number of block selectors. numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms treeSels := make([]uint8, numSels) for i := range treeSels { treeSels[i] = uint8(i % numTrees) } // Initialize prefix codes. var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode var codes1D [maxNumTrees]prefix.PrefixCodes var trees1D [maxNumTrees]prefix.Encoder for i := range codes2D[:numTrees] { pc := codes2D[i][:numSyms] for j := range pc { pc[j].Sym = uint32(j) } codes1D[i] = pc } // First cut at assigning prefix trees to each group. var codes prefix.PrefixCodes var blkLen, selIdx int for _, sym := range syms { if blkLen == 0 { blkLen = numBlockSyms codes = codes2D[treeSels[selIdx]][:numSyms] selIdx++ } blkLen-- codes[sym].Cnt++ } // TODO(dsnet): Use K-means to cluster groups to each prefix tree. // Generate lengths and prefixes based on symbol frequencies. for i := range trees1D[:numTrees] { pc := prefix.PrefixCodes(codes2D[i][:numSyms]) pc.SortByCount() if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil { panic(err) } pc.SortBySymbol() } // Write out information about the trees and tree selectors. var mtf internal.MoveToFront zw.wr.WriteBitsBE64(uint64(numTrees), 3) zw.wr.WriteBitsBE64(uint64(numSels), 15) treeSelsMTF := make([]uint8, numSels) copy(treeSelsMTF, treeSels) mtf.Encode(treeSelsMTF) for _, sym := range treeSelsMTF { zw.wr.WriteSymbol(uint(sym), &encSel) } zw.wr.WritePrefixCodes(codes1D[:numTrees], trees1D[:numTrees]) // Write out prefix encoded symbols of compressed data. var tree *prefix.Encoder blkLen, selIdx = 0, 0 for _, sym := range syms { if blkLen == 0 { blkLen = numBlockSyms tree = &trees1D[treeSels[selIdx]] selIdx++ } blkLen-- ok := zw.wr.TryWriteSymbol(uint(sym), tree) if !ok { zw.wr.WriteSymbol(uint(sym), tree) } } }