func generate_binary_charset(allowed_charset []byte) []byte { binary_charset := make([]byte, len(allowed_charset)) for i, allowed := range allowed_charset { binary := utils.Binary(int(allowed), 8, false) binary_charset[i] = utils.Bits_to_byte(binary) } return binary_charset }
func (encoder *HuffmanEncoder_V1) generateValidCodes(allowed_charset *charset.Charset) []byte { valid_codes := make([]byte, 0, 64) for code := encoder.min_allowed_code; code < encoder.max_allowed_code; code++ { // Take 6 bits and add trailing 10 code_trimmed_and_appended := utils.Binary(code&63, 6, true) code_trimmed_and_appended = append(code_trimmed_and_appended, []byte{1, 0}...) code_byte := utils.Bits_to_byte(code_trimmed_and_appended) for _, allowed := range (*allowed_charset).Binary { if code_byte == allowed { valid_codes = append(valid_codes, byte(code)) break } } } return valid_codes }
func (encoder *HuffmanEncoder_V1) GenerateHuffman(data []byte, allowed_charset *charset.Charset) (huffman Huffman, err error) { var h Huffman var map_codes func(symbols []int, codes []byte, valid_codes []byte, allowed_charset *charset.Charset) ([]byte, error) map_codes = func(symbols []int, codes []byte, valid_codes []byte, allowed_charset *charset.Charset) ([]byte, error) { //fmt.Printf("W map_codes valid_codes = %v\n", valid_codes) symbols_length := len(symbols) codes_length := len(codes) valid_length := len(valid_codes) if symbols_length == codes_length { return codes, nil } if valid_length == 0 { return []byte{0}, errors.New("No result.") } prev_code := int(codes[codes_length-1]) prev_symbol := symbols[codes_length-1] symbol := symbols[codes_length] max_code_1 := prev_code + symbol - prev_symbol max_code_index := -(symbols_length - codes_length) % valid_length if max_code_index < 0 { max_code_index += valid_length } max_code_2 := int(valid_codes[max_code_index]) max_code := byte(0) if max_code_1 < max_code_2 { max_code = byte(max_code_1) } else { max_code = byte(max_code_2) } reachable_codes := make([]byte, 0, valid_length) for _, code := range valid_codes { if code <= max_code { reachable_codes = append(reachable_codes, code) } } if byte(symbol) == data[len(data)-1] { reachable_codes_temp := make([]byte, 0, valid_length) for _, code := range reachable_codes { lsb := utils.Binary(int(code), 8, true)[2:] byte_code := append(lsb, []byte{0, 0}...) byte_code_byte := utils.Bits_to_byte(byte_code) for _, allowed := range (*allowed_charset).Binary { if allowed == byte_code_byte { reachable_codes_temp = append(reachable_codes_temp, code) break } } reachable_codes = reachable_codes_temp } } //fmt.Printf("W reachable_codes = %v\n", reachable_codes) for i := len(reachable_codes) - 1; i >= 0; i-- { current_code := reachable_codes[i] next_codes := make([]byte, 0, valid_length) for _, valid := range valid_codes { if valid > current_code { next_codes = append(next_codes, valid) } } assigned_codes, err := map_codes(symbols, append(codes, current_code), next_codes, allowed_charset) if err == nil { return assigned_codes, nil } } return []byte{0}, errors.New("No result.") } fmt.Printf("GenerateHuffman(V1, data = %x)\n", data) min_allowed_code := (*encoder).min_allowed_code // Generate a sorted list of bytes in data sorted_bytes_set := make([]int, len(data)) for i, b := range data { sorted_bytes_set[i] = int(b) } sorted_bytes_set = utils.RemoveDuplicates(sorted_bytes_set) sort.Ints(sorted_bytes_set) assigned_codes, err := map_codes(append([]int{-1}, sorted_bytes_set...), []byte{byte(min_allowed_code - 1)}, (*encoder).valid_codes, allowed_charset) if err != nil { return h, err } assigned_codes = assigned_codes[1:] //fmt.Printf("W assigned_codes = %v\n", assigned_codes) symbols := make(map[byte]byte) for i := 0; i < len(sorted_bytes_set); i++ { symbols[byte(sorted_bytes_set[i])] = assigned_codes[i] } slack_2 := 0 slack_6 := 1 slack_8 := int(assigned_codes[0]) - min_allowed_code code_lengths := make([]byte, 0, len(assigned_codes)) for len(code_lengths) < 257 || slack_2 > 0 || slack_6 > 0 || slack_8 > 0 { if len(sorted_bytes_set) > 0 && len(code_lengths) == sorted_bytes_set[0] { code_lengths = append(code_lengths, 8) current_code := assigned_codes[0] assigned_codes = assigned_codes[1:] sorted_bytes_set = sorted_bytes_set[1:] if len(assigned_codes) > 0 { slack_8 = int(assigned_codes[0]) - int(current_code) - 1 } else { slack_8 = 124 - utils.CountOccurrencies(code_lengths, 8) } } else if len(code_lengths) == 256 { code_lengths = append(code_lengths, 2) slack_2 = 1 } else if slack_8 > 0 { code_lengths = append(code_lengths, 8) slack_8-- } else if slack_6 > 0 { code_lengths = append(code_lengths, 6) slack_6-- } else if slack_2 > 0 { code_lengths = append(code_lengths, 2) slack_2-- } else { code_lengths = append(code_lengths, 0) } } // Check for HLIT extra_code_lengths := 257 - len(code_lengths) if extra_code_lengths < 16 && extra_code_lengths > 12 || extra_code_lengths > 28 { return h, errors.New("Invalid HLIT.") } // fmt.Printf("W code_lengths = %v\nW symbols = %v\n", code_lengths, symbols) // Populate h h.Code_lengths = code_lengths h.Symbols = symbols h.Trailer = 0 fmt.Println("Huffman found.") return h, nil }
func (d *ZlibStream) CompressVariant(block []byte, h *huffman.Huffman, is_last bool) { lenOfLen := []int{2, 4, 3, 4, 4, 5, 4, 4, 4, 0, 3, 5, 4} /* +------+--------+ | Code | Length | +------+--------+ | 16 | 2 | | 17 | 4 | | 18 | 3 | | 0 | 4 | | 8 | 4 | | 7 | 5 | | 9 | 4 | | 6 | 4 | | 10 | 4 | | 5 | - | | 11 | 3 | | 4 | 5 | | 12 | 4 | +------+--------+ */ code_lengths := (*h).Code_lengths symbols_map := (*h).Symbols encode := func(code []byte, n int) { //fmt.Printf("W encode(%v, %v)\n", code, n) first := true for n > 0 { if !first && n > 6 && d.ByteDisalignment() == 2 { x := n / 6 for i := 0; i < x; i++ { d.WriteBits([]byte{0, 0, 1, 1}) } n -= x * 6 } else { d.WriteBits(code) n-- } first = false } } //fmt.Printf("W BEGIN CompressVariant\n") // Header if is_last { d.WriteBits([]byte{1}) // BFINAL } else { d.WriteBits([]byte{0}) // BFINAL } d.WriteBits([]byte{0, 1}) // BTYPE = 10 - dynamic Huffman codes d.WriteBits(utils.Binary(len(code_lengths)-257, 5, false)) // HLIT d.WriteBits(utils.Binary(25, 5, false)) // HDIST d.WriteBits(utils.Binary(9, 4, false)) // HCLEN = len(lenOfLen) - 4 = 9 // Table // Lengths of lengths d.WriteLenOfLen(lenOfLen) // Encode runs_map := make([]Run, 0, len(code_lengths)) for _, code_length := range code_lengths { map_len := len(runs_map) if map_len > 0 && runs_map[map_len-1].code_length == code_length { runs_map[map_len-1].runs++ } else { var run Run run.code_length = code_length run.runs = 1 runs_map = append(runs_map, run) } } for _, run := range runs_map { switch run.code_length { case 0: encode([]byte{1, 0, 0, 0}, run.runs) case 6: encode([]byte{1, 0, 0, 1}, run.runs) case 8: encode([]byte{1, 0, 1, 0}, run.runs) } } // Distance if d.ByteDisalignment() == 2 { d.WriteBits([]byte{0, 1, 1}) // H 18 d.WriteBits(utils.Binary(11, 7, false)) // REPEAT ZERO 22x d.WriteBits([]byte{0, 0, 1, 0}) // H 16 + REPEAT 4x } else { d.WriteBits([]byte{1, 0, 0, 0}) // H 0 d.WriteBits([]byte{0, 1, 1}) // H 18 d.WriteBits(utils.Binary(10, 7, false)) // REPEAT ZERO 21x d.WriteBits([]byte{0, 0, 1, 0}) // H 16 + REPEAT 4x } // Data for _, b := range block { new_symbol := symbols_map[b] d.WriteBits(utils.Binary(int(new_symbol), 8, true)) } d.WriteBits(utils.Binary(int(h.Trailer), 6, true)) //fmt.Printf("W END CompressVariant\n") }
func (d *ZlibStream) WritePaddingBlock() { lenOfLen := []int{2, 5, 0, 4, 3, 0, 6, 4, 4, 4, 4, 6, 2} /* +------+--------+ | Code | Length | +------+--------+ | 16 | 2 | | 17 | 5 | | 18 | - | | 0 | 4 | | 8 | 3 | | 7 | - | | 9 | 6 | | 6 | 4 | | 10 | 4 | | 5 | 4 | | 11 | 4 | | 4 | 6 | | 12 | 2 | +------+--------+ */ encode := func(code []byte, n int) { first := true for n > 0 { if !first && n > 6 && d.ByteDisalignment() == 0 { x := 10 if n < 10 { x = n } d.WriteBits([]byte{0, 1}) d.WriteBits(utils.Binary(x-7, 2, false)) d.WriteBits([]byte{0, 1, 1, 0}) n -= x } else { d.WriteBits(code) n-- } first = false } } // Header d.WriteBits([]byte{0}) // BFINAL d.WriteBits([]byte{0, 1}) // BTYPE = 10 - dynamic Huffman codes d.WriteBits(utils.Binary(8, 5, false)) // HLIT d.WriteBits(utils.Binary(16, 5, false)) // HDIST d.WriteBits(utils.Binary(9, 4, false)) // HCLEN = len(lenOfLen) - 4 = 9 // Table // Lengths of lengths d.WriteLenOfLen(lenOfLen) // Literal + lengths encode([]byte{1, 0, 1, 0}, 197) encode([]byte{1, 1, 0, 0}, 64) // encode([]byte{1, 0, 1, 0}, 4) // (see below) // Distance // encode([]byte{1, 0, 1, 0}, 17) // (see below) // Specs allow to combine encode([]byte{1, 0, 1, 0}, 21) // End of block d.WriteBits([]byte{1, 1, 1, 0, 1, 1}) }
func (d *ZlibStream) WriteLenOfLen(lengths []int) { for _, length := range lengths { d.WriteBits(utils.Binary(length, 3, false)) } }