Esempio n. 1
0
func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) {
	numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
	if numSyms < 3 {
		panic(ErrCorrupt) // Not possible to encode EOF marker
	}

	// Read information about the trees and tree selectors.
	var mtf internal.MoveToFront
	numTrees := int(zr.rd.ReadBitsBE64(3))
	if numTrees < minNumTrees || numTrees > maxNumTrees {
		panic(ErrCorrupt)
	}
	numSels := int(zr.rd.ReadBitsBE64(15))
	treeSels := make([]uint8, numSels)
	for i := range treeSels {
		sym, ok := zr.rd.TryReadSymbol(&decSel)
		if !ok {
			sym = zr.rd.ReadSymbol(&decSel)
		}
		if int(sym) >= numTrees {
			panic(ErrCorrupt)
		}
		treeSels[i] = uint8(sym)
	}
	mtf.Decode(treeSels)

	// Initialize prefix codes.
	var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
	var codes1D [maxNumTrees]prefix.PrefixCodes
	var trees1D [maxNumTrees]prefix.Decoder
	for i := range codes2D[:numTrees] {
		pc := codes2D[i][:numSyms]
		for j := range pc {
			pc[j].Sym = uint32(j)
		}
		codes1D[i] = pc
	}
	zr.rd.ReadPrefixCodes(codes1D[:numTrees], trees1D[:numTrees])

	// Read prefix encoded symbols of compressed data.
	var tree *prefix.Decoder
	var blkLen, selIdx int
	for {
		if blkLen == 0 {
			blkLen = numBlockSyms
			if selIdx >= len(treeSels) {
				panic(ErrCorrupt)
			}
			tree = &trees1D[treeSels[selIdx]]
			selIdx++
		}
		blkLen--
		sym, ok := zr.rd.TryReadSymbol(tree)
		if !ok {
			sym = zr.rd.ReadSymbol(tree)
		}

		if int(sym) == numSyms-1 {
			break // EOF marker
		}
		if int(sym) >= numSyms {
			panic(ErrCorrupt) // Invalid symbol used
		}
		if len(syms) >= zr.level*blockSize {
			panic(ErrCorrupt) // Block is too large
		}
		syms = append(syms, uint16(sym))
	}
	return syms
}
Esempio n. 2
0
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) {
	numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
	if numSyms < 3 {
		panic(errInvalid) // Not possible to encode EOF marker
	}
	syms = append(syms, uint16(numSyms-1)) // EOF marker

	// Compute number of prefix trees needed.
	numTrees := maxNumTrees
	for i, lim := range []int{200, 600, 1200, 2400} {
		if len(syms) < lim {
			numTrees = minNumTrees + i
			break
		}
	}

	// Compute number of block selectors.
	numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms
	treeSels := make([]uint8, numSels)
	for i := range treeSels {
		treeSels[i] = uint8(i % numTrees)
	}

	// Initialize prefix codes.
	var codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
	var codes1D [maxNumTrees]prefix.PrefixCodes
	var trees1D [maxNumTrees]prefix.Encoder
	for i := range codes2D[:numTrees] {
		pc := codes2D[i][:numSyms]
		for j := range pc {
			pc[j].Sym = uint32(j)
		}
		codes1D[i] = pc
	}

	// First cut at assigning prefix trees to each group.
	var codes prefix.PrefixCodes
	var blkLen, selIdx int
	for _, sym := range syms {
		if blkLen == 0 {
			blkLen = numBlockSyms
			codes = codes2D[treeSels[selIdx]][:numSyms]
			selIdx++
		}
		blkLen--
		codes[sym].Cnt++
	}

	// TODO(dsnet): Use K-means to cluster groups to each prefix tree.

	// Generate lengths and prefixes based on symbol frequencies.
	for i := range trees1D[:numTrees] {
		pc := prefix.PrefixCodes(codes2D[i][:numSyms])
		pc.SortByCount()
		if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil {
			panic(err)
		}
		pc.SortBySymbol()
	}

	// Write out information about the trees and tree selectors.
	var mtf internal.MoveToFront
	zw.wr.WriteBitsBE64(uint64(numTrees), 3)
	zw.wr.WriteBitsBE64(uint64(numSels), 15)
	treeSelsMTF := make([]uint8, numSels)
	copy(treeSelsMTF, treeSels)
	mtf.Encode(treeSelsMTF)
	for _, sym := range treeSelsMTF {
		zw.wr.WriteSymbol(uint(sym), &encSel)
	}
	zw.wr.WritePrefixCodes(codes1D[:numTrees], trees1D[:numTrees])

	// Write out prefix encoded symbols of compressed data.
	var tree *prefix.Encoder
	blkLen, selIdx = 0, 0
	for _, sym := range syms {
		if blkLen == 0 {
			blkLen = numBlockSyms
			tree = &trees1D[treeSels[selIdx]]
			selIdx++
		}
		blkLen--
		ok := zw.wr.TryWriteSymbol(uint(sym), tree)
		if !ok {
			zw.wr.WriteSymbol(uint(sym), tree)
		}
	}
}