예제 #1
0
func generate_binary_charset(allowed_charset []byte) []byte {
	binary_charset := make([]byte, len(allowed_charset))
	for i, allowed := range allowed_charset {
		binary := utils.Binary(int(allowed), 8, false)
		binary_charset[i] = utils.Bits_to_byte(binary)
	}
	return binary_charset
}
예제 #2
0
func (encoder *HuffmanEncoder_V1) generateValidCodes(allowed_charset *charset.Charset) []byte {
	valid_codes := make([]byte, 0, 64)
	for code := encoder.min_allowed_code; code < encoder.max_allowed_code; code++ {
		// Take 6 bits and add trailing 10
		code_trimmed_and_appended := utils.Binary(code&63, 6, true)
		code_trimmed_and_appended = append(code_trimmed_and_appended, []byte{1, 0}...)

		code_byte := utils.Bits_to_byte(code_trimmed_and_appended)

		for _, allowed := range (*allowed_charset).Binary {
			if code_byte == allowed {
				valid_codes = append(valid_codes, byte(code))
				break
			}
		}
	}
	return valid_codes
}
예제 #3
0
func (encoder *HuffmanEncoder_V1) GenerateHuffman(data []byte, allowed_charset *charset.Charset) (huffman Huffman, err error) {
	var h Huffman
	var map_codes func(symbols []int, codes []byte, valid_codes []byte, allowed_charset *charset.Charset) ([]byte, error)
	map_codes = func(symbols []int, codes []byte, valid_codes []byte, allowed_charset *charset.Charset) ([]byte, error) {
		//fmt.Printf("W map_codes valid_codes = %v\n", valid_codes)
		symbols_length := len(symbols)
		codes_length := len(codes)
		valid_length := len(valid_codes)

		if symbols_length == codes_length {
			return codes, nil
		}

		if valid_length == 0 {
			return []byte{0}, errors.New("No result.")
		}

		prev_code := int(codes[codes_length-1])
		prev_symbol := symbols[codes_length-1]
		symbol := symbols[codes_length]

		max_code_1 := prev_code + symbol - prev_symbol

		max_code_index := -(symbols_length - codes_length) % valid_length

		if max_code_index < 0 {
			max_code_index += valid_length
		}

		max_code_2 := int(valid_codes[max_code_index])
		max_code := byte(0)

		if max_code_1 < max_code_2 {
			max_code = byte(max_code_1)
		} else {
			max_code = byte(max_code_2)
		}

		reachable_codes := make([]byte, 0, valid_length)
		for _, code := range valid_codes {
			if code <= max_code {
				reachable_codes = append(reachable_codes, code)
			}
		}

		if byte(symbol) == data[len(data)-1] {
			reachable_codes_temp := make([]byte, 0, valid_length)
			for _, code := range reachable_codes {
				lsb := utils.Binary(int(code), 8, true)[2:]
				byte_code := append(lsb, []byte{0, 0}...)
				byte_code_byte := utils.Bits_to_byte(byte_code)

				for _, allowed := range (*allowed_charset).Binary {
					if allowed == byte_code_byte {
						reachable_codes_temp = append(reachable_codes_temp, code)
						break
					}
				}
				reachable_codes = reachable_codes_temp
			}
		}

		//fmt.Printf("W reachable_codes = %v\n", reachable_codes)

		for i := len(reachable_codes) - 1; i >= 0; i-- {
			current_code := reachable_codes[i]
			next_codes := make([]byte, 0, valid_length)

			for _, valid := range valid_codes {
				if valid > current_code {
					next_codes = append(next_codes, valid)
				}
			}

			assigned_codes, err := map_codes(symbols, append(codes, current_code), next_codes, allowed_charset)
			if err == nil {
				return assigned_codes, nil
			}
		}

		return []byte{0}, errors.New("No result.")
	}

	fmt.Printf("GenerateHuffman(V1, data = %x)\n", data)

	min_allowed_code := (*encoder).min_allowed_code

	// Generate a sorted list of bytes in data
	sorted_bytes_set := make([]int, len(data))
	for i, b := range data {
		sorted_bytes_set[i] = int(b)
	}
	sorted_bytes_set = utils.RemoveDuplicates(sorted_bytes_set)
	sort.Ints(sorted_bytes_set)

	assigned_codes, err := map_codes(append([]int{-1}, sorted_bytes_set...), []byte{byte(min_allowed_code - 1)}, (*encoder).valid_codes, allowed_charset)
	if err != nil {
		return h, err
	}

	assigned_codes = assigned_codes[1:]

	//fmt.Printf("W assigned_codes = %v\n", assigned_codes)

	symbols := make(map[byte]byte)
	for i := 0; i < len(sorted_bytes_set); i++ {
		symbols[byte(sorted_bytes_set[i])] = assigned_codes[i]
	}

	slack_2 := 0
	slack_6 := 1
	slack_8 := int(assigned_codes[0]) - min_allowed_code
	code_lengths := make([]byte, 0, len(assigned_codes))

	for len(code_lengths) < 257 || slack_2 > 0 || slack_6 > 0 || slack_8 > 0 {
		if len(sorted_bytes_set) > 0 && len(code_lengths) == sorted_bytes_set[0] {
			code_lengths = append(code_lengths, 8)
			current_code := assigned_codes[0]
			assigned_codes = assigned_codes[1:]
			sorted_bytes_set = sorted_bytes_set[1:]
			if len(assigned_codes) > 0 {
				slack_8 = int(assigned_codes[0]) - int(current_code) - 1
			} else {
				slack_8 = 124 - utils.CountOccurrencies(code_lengths, 8)
			}
		} else if len(code_lengths) == 256 {
			code_lengths = append(code_lengths, 2)
			slack_2 = 1
		} else if slack_8 > 0 {
			code_lengths = append(code_lengths, 8)
			slack_8--
		} else if slack_6 > 0 {
			code_lengths = append(code_lengths, 6)
			slack_6--
		} else if slack_2 > 0 {
			code_lengths = append(code_lengths, 2)
			slack_2--
		} else {
			code_lengths = append(code_lengths, 0)
		}
	}

	// Check for HLIT
	extra_code_lengths := 257 - len(code_lengths)
	if extra_code_lengths < 16 && extra_code_lengths > 12 || extra_code_lengths > 28 {
		return h, errors.New("Invalid HLIT.")
	}

	// fmt.Printf("W code_lengths = %v\nW symbols = %v\n", code_lengths, symbols)

	// Populate h
	h.Code_lengths = code_lengths
	h.Symbols = symbols
	h.Trailer = 0

	fmt.Println("Huffman found.")

	return h, nil
}
예제 #4
0
func (d *ZlibStream) CompressVariant(block []byte, h *huffman.Huffman, is_last bool) {
	lenOfLen := []int{2, 4, 3, 4, 4, 5, 4, 4, 4, 0, 3, 5, 4}
	/*
		+------+--------+
		| Code | Length |
		+------+--------+
		| 16   | 2      |
		| 17   | 4      |
		| 18   | 3      |
		| 0    | 4      |
		| 8    | 4      |
		| 7    | 5      |
		| 9    | 4      |
		| 6    | 4      |
		| 10   | 4      |
		| 5    | -      |
		| 11   | 3      |
		| 4    | 5      |
		| 12   | 4      |
		+------+--------+
	*/

	code_lengths := (*h).Code_lengths
	symbols_map := (*h).Symbols

	encode := func(code []byte, n int) {
		//fmt.Printf("W encode(%v, %v)\n", code, n)
		first := true
		for n > 0 {
			if !first && n > 6 && d.ByteDisalignment() == 2 {
				x := n / 6
				for i := 0; i < x; i++ {
					d.WriteBits([]byte{0, 0, 1, 1})
				}
				n -= x * 6
			} else {
				d.WriteBits(code)
				n--
			}
			first = false
		}
	}

	//fmt.Printf("W BEGIN CompressVariant\n")

	// Header
	if is_last {
		d.WriteBits([]byte{1}) // BFINAL
	} else {
		d.WriteBits([]byte{0}) // BFINAL
	}
	d.WriteBits([]byte{0, 1})                                  // BTYPE = 10 - dynamic Huffman codes
	d.WriteBits(utils.Binary(len(code_lengths)-257, 5, false)) // HLIT
	d.WriteBits(utils.Binary(25, 5, false))                    // HDIST
	d.WriteBits(utils.Binary(9, 4, false))                     // HCLEN = len(lenOfLen) - 4 = 9

	// Table
	// Lengths of lengths
	d.WriteLenOfLen(lenOfLen)

	// Encode
	runs_map := make([]Run, 0, len(code_lengths))
	for _, code_length := range code_lengths {
		map_len := len(runs_map)
		if map_len > 0 && runs_map[map_len-1].code_length == code_length {
			runs_map[map_len-1].runs++
		} else {
			var run Run
			run.code_length = code_length
			run.runs = 1
			runs_map = append(runs_map, run)
		}
	}
	for _, run := range runs_map {
		switch run.code_length {
		case 0:
			encode([]byte{1, 0, 0, 0}, run.runs)
		case 6:
			encode([]byte{1, 0, 0, 1}, run.runs)
		case 8:
			encode([]byte{1, 0, 1, 0}, run.runs)
		}
	}

	// Distance
	if d.ByteDisalignment() == 2 {
		d.WriteBits([]byte{0, 1, 1})            // H 18
		d.WriteBits(utils.Binary(11, 7, false)) // REPEAT ZERO 22x
		d.WriteBits([]byte{0, 0, 1, 0})         // H 16 + REPEAT 4x
	} else {
		d.WriteBits([]byte{1, 0, 0, 0})         // H 0
		d.WriteBits([]byte{0, 1, 1})            // H 18
		d.WriteBits(utils.Binary(10, 7, false)) // REPEAT ZERO 21x
		d.WriteBits([]byte{0, 0, 1, 0})         // H 16 + REPEAT 4x
	}

	// Data
	for _, b := range block {
		new_symbol := symbols_map[b]
		d.WriteBits(utils.Binary(int(new_symbol), 8, true))
	}
	d.WriteBits(utils.Binary(int(h.Trailer), 6, true))

	//fmt.Printf("W END CompressVariant\n")
}
예제 #5
0
func (d *ZlibStream) WritePaddingBlock() {
	lenOfLen := []int{2, 5, 0, 4, 3, 0, 6, 4, 4, 4, 4, 6, 2}
	/*
		+------+--------+
		| Code | Length |
		+------+--------+
		| 16   | 2      |
		| 17   | 5      |
		| 18   | -      |
		| 0    | 4      |
		| 8    | 3      |
		| 7    | -      |
		| 9    | 6      |
		| 6    | 4      |
		| 10   | 4      |
		| 5    | 4      |
		| 11   | 4      |
		| 4    | 6      |
		| 12   | 2      |
		+------+--------+
	*/

	encode := func(code []byte, n int) {
		first := true
		for n > 0 {
			if !first && n > 6 && d.ByteDisalignment() == 0 {
				x := 10
				if n < 10 {
					x = n
				}
				d.WriteBits([]byte{0, 1})
				d.WriteBits(utils.Binary(x-7, 2, false))
				d.WriteBits([]byte{0, 1, 1, 0})
				n -= x
			} else {
				d.WriteBits(code)
				n--
			}
			first = false
		}
	}

	// Header
	d.WriteBits([]byte{0})                  // BFINAL
	d.WriteBits([]byte{0, 1})               // BTYPE = 10 - dynamic Huffman codes
	d.WriteBits(utils.Binary(8, 5, false))  // HLIT
	d.WriteBits(utils.Binary(16, 5, false)) // HDIST
	d.WriteBits(utils.Binary(9, 4, false))  // HCLEN = len(lenOfLen) - 4 = 9

	// Table
	// Lengths of lengths
	d.WriteLenOfLen(lenOfLen)

	// Literal + lengths
	encode([]byte{1, 0, 1, 0}, 197)
	encode([]byte{1, 1, 0, 0}, 64)
	// encode([]byte{1, 0, 1, 0}, 4) // (see below)

	// Distance
	// encode([]byte{1, 0, 1, 0}, 17) // (see below)

	// Specs allow to combine
	encode([]byte{1, 0, 1, 0}, 21)

	// End of block
	d.WriteBits([]byte{1, 1, 1, 0, 1, 1})
}
예제 #6
0
func (d *ZlibStream) WriteLenOfLen(lengths []int) {
	for _, length := range lengths {
		d.WriteBits(utils.Binary(length, 3, false))
	}
}