Example #1
0
func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 {
	if nb <= 32 {
		v := uint32(pr.ReadBits(nb))
		return uint64(internal.ReverseUint32N(v, nb))
	}
	v0 := internal.ReverseUint32(uint32(pr.ReadBits(32)))
	v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32)))
	v := uint64(v0)<<32 | uint64(v1)
	return v >> (64 - nb)
}
Example #2
0
func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) {
	if nb <= 32 {
		v := internal.ReverseUint32N(uint32(v), nb)
		pw.WriteBits(uint(v), nb)
		return
	}
	v <<= (64 - nb)
	v0 := internal.ReverseUint32(uint32(v >> 32))
	v1 := internal.ReverseUint32(uint32(v))
	pw.WriteBits(uint(v0), 32)
	pw.WriteBits(uint(v1), nb-32)
	return
}
Example #3
0
// updateCRC returns the result of adding the bytes in buf to the crc.
func updateCRC(crc uint32, buf []byte) uint32 {
	// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
	// order. That is, the MSB is read before the LSB. Thus, we can use the
	// standard library version of CRC-32 IEEE with some minor adjustments.
	crc = internal.ReverseUint32(crc)
	var arr [4096]byte
	for len(buf) > 0 {
		cnt := copy(arr[:], buf)
		buf = buf[cnt:]
		for i, b := range arr[:cnt] {
			arr[i] = internal.ReverseLUT[b]
		}
		crc = crc32.Update(crc, crc32.IEEETable, arr[:cnt])
	}
	return internal.ReverseUint32(crc)
}
Example #4
0
// handleDegenerateCodes converts a degenerate tree into a canonical tree.
//
// For example, when the input is an under-subscribed tree:
//	input:  []PrefixCode{
//		{Sym: 0, Len: 3},
//		{Sym: 1, Len: 4},
//		{Sym: 2, Len: 3},
//	}
//	output: []PrefixCode{
//		{Sym:   0, Len: 3, Val:  0}, //  000
//		{Sym:   1, Len: 4, Val:  2}, // 0010
//		{Sym:   2, Len: 3, Val:  4}, //  100
//		{Sym: 258, Len: 4, Val: 10}, // 1010
//		{Sym: 259, Len: 3, Val:  6}, //  110
//		{Sym: 260, Len: 1, Val:  1}, //    1
//	}
//
// For example, when the input is an over-subscribed tree:
//	input:  []PrefixCode{
//		{Sym: 0, Len: 1},
//		{Sym: 1, Len: 3},
//		{Sym: 2, Len: 4},
//		{Sym: 3, Len: 3},
//		{Sym: 4, Len: 2},
//	}
//	output: []PrefixCode{
//		{Sym: 0, Len: 1, Val: 0}, //   0
//		{Sym: 1, Len: 3, Val: 3}, // 011
//		{Sym: 3, Len: 3, Val: 7}, // 111
//		{Sym: 4, Len: 2, Val: 1}, //  01
//	}
func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes {
	// Since there is no formal definition for the BZip2 format, there is no
	// specification that says that the code lengths must form a complete
	// prefix tree (IE: it is neither over-subscribed nor under-subscribed).
	// Thus, the original C implementation becomes the reference for how prefix
	// decoding is done in these edge cases. Unfortunately, the C version does
	// not error when an invalid tree is used, but rather allows decoding to
	// continue and only errors if some bit pattern happens to cause an error.
	// Thus, it is possible for an invalid tree to end up decoding an input
	// "properly" so long as invalid bit patterns are not present. In order to
	// replicate this non-specified behavior, we use a ported version of the
	// C code to generate the codes as a valid canonical tree by substituting
	// invalid nodes with invalid symbols.
	//
	// ====================================================
	// This program, "bzip2", the associated library "libbzip2", and all
	// documentation, are copyright (C) 1996-2010 Julian R Seward.  All
	// rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions
	// are met:
	//
	// 1. Redistributions of source code must retain the above copyright
	//    notice, this list of conditions and the following disclaimer.
	//
	// 2. The origin of this software must not be misrepresented; you must
	//    not claim that you wrote the original software.  If you use this
	//    software in a product, an acknowledgment in the product
	//    documentation would be appreciated but is not required.
	//
	// 3. Altered source versions must be plainly marked as such, and must
	//    not be misrepresented as being the original software.
	//
	// 4. The name of the author may not be used to endorse or promote
	//    products derived from this software without specific prior written
	//    permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
	// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
	// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
	// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	//
	// Julian Seward, [email protected]
	// bzip2/libbzip2 version 1.0.6 of 6 September 2010
	// ====================================================
	var (
		limits [maxPrefixBits + 2]int32
		bases  [maxPrefixBits + 2]int32
		perms  [maxNumSyms]int32
		minLen uint32 = maxPrefixBits
		maxLen uint32 = 0
	)

	const (
		statusOkay = iota
		statusInvalid
		statusNeedBits
		statusMaxBits
	)

	// createTables is the BZ2_hbCreateDecodeTables function from the C code.
	var createTables = func(codes []prefix.PrefixCode) {
		for _, c := range codes {
			if c.Len > maxLen {
				maxLen = c.Len
			}
			if c.Len < minLen {
				minLen = c.Len
			}
		}

		var pp int
		for i := minLen; i <= maxLen; i++ {
			for j, c := range codes {
				if c.Len == i {
					perms[pp] = int32(j)
					pp++
				}
			}
		}

		var vec int32
		for _, c := range codes {
			bases[c.Len+1]++
		}
		for i := 1; i < len(bases); i++ {
			bases[i] += bases[i-1]
		}
		for i := minLen; i <= maxLen; i++ {
			vec += bases[i+1] - bases[i]
			limits[i] = vec - 1
			vec <<= 1
		}
		for i := minLen + 1; i <= maxLen; i++ {
			bases[i] = ((limits[i-1] + 1) << 1) - bases[i]
		}
	}

	// getSymbol is the GET_MTF_VAL macro from the C code.
	var getSymbol = func(c prefix.PrefixCode) (uint32, int) {
		v := internal.ReverseUint32(c.Val)
		n := c.Len

		zn := minLen
		if zn > n {
			return 0, statusNeedBits
		}
		zvec := int32(v >> (32 - zn))
		v <<= zn
		for {
			if zn > maxLen {
				return 0, statusMaxBits
			}
			if zvec <= limits[zn] {
				break
			}
			zn++
			if zn > n {
				return 0, statusNeedBits
			}
			zvec = (zvec << 1) | int32(v>>31)
			v <<= 1
		}
		if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms {
			return 0, statusInvalid
		}
		return uint32(perms[zvec-bases[zn]]), statusOkay
	}

	// Step 1: Create the prefix trees using the C algorithm.
	createTables(codes)

	// Step 2: Starting with the shortest bit pattern, explore the whole tree.
	// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen).
	// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms).
	var pcodesArr [2 * maxNumSyms]prefix.PrefixCode
	var pcodes = pcodesArr[:maxNumSyms]
	var exploreCode func(prefix.PrefixCode) bool
	exploreCode = func(c prefix.PrefixCode) (term bool) {
		sym, status := getSymbol(c)
		switch status {
		case statusOkay:
			// This code is valid, so insert it.
			c.Sym = sym
			pcodes[sym] = c
			term = true
		case statusInvalid:
			// This code is invalid, so insert an invalid symbol.
			c.Sym = uint32(len(pcodes))
			pcodes = append(pcodes, c)
			term = true
		case statusNeedBits:
			// This code is too short, so explore both children.
			c.Len++
			c0, c1 := c, c
			c1.Val |= 1 << (c.Len - 1)

			b0 := exploreCode(c0)
			b1 := exploreCode(c1)
			switch {
			case !b0 && b1:
				c0.Sym = uint32(len(pcodes))
				pcodes = append(pcodes, c0)
			case !b1 && b0:
				c1.Sym = uint32(len(pcodes))
				pcodes = append(pcodes, c1)
			}
			term = b0 || b1
		case statusMaxBits:
			// This code is too long, so report it upstream.
			term = false
		}
		return term // Did this code terminate?
	}
	exploreCode(prefix.PrefixCode{})

	// Step 3: Copy new sparse codes to old output codes.
	codes = codes[:0]
	for _, c := range pcodes {
		if c.Len > 0 {
			codes = append(codes, c)
		}
	}
	return codes
}