// ReadBool func ReadBool(r io.Reader, count uint) ([]bool, error) { var out []bool bitWidth := uint(1) // fixed for booleans byteWidth := (bitWidth + uint(7)) / uint(8) p := make([]byte, byteWidth) br := bufio.NewReader(r) for { // run := <bit-packed-run> | <rle-run> header, err := ReadVarint32(br) if err == io.EOF { break } else if err != nil { return nil, err } if (header & 1) == 1 { // bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1) // we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8 // bit-pack-count := (number of values in this run) / 8 literalCount := (header >> 1) * 8 if uint(literalCount) > ((count - uint(len(out))) + 7) { return nil, fmt.Errorf("bitcoding.bool:bad encoding found more elements (%d) than expected (%d)", uint(len(out))+uint(literalCount), count) } r := bitpacking.NewDecoder(bitWidth) values := make([]int32, literalCount) if err := r.Read(br, values); err != nil { return nil, err } for i := int32(0); i < literalCount; i++ { out = append(out, values[i] == 1) } } else { // rle-run := <rle-header> <repeated-value> // rle-header := varint-encode( (number of times repeated) << 1) // repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width) repeatCount := int32(header >> 1) if _, err := br.Read(p); err != nil { return nil, fmt.Errorf("short read value: %s", err) } value := unpackLittleEndianInt32(p) if uint(repeatCount) > (count - uint(len(out))) { return nil, fmt.Errorf("rle.bool:bad encoding: found more elements (%d) than expected (%d)", uint(len(out))+uint(repeatCount), count) } for i := int32(0); i < repeatCount; i++ { out = append(out, value == 1) } } } if uint(len(out)) < count { return nil, fmt.Errorf("could not decode %d values only %d", count, len(out)) } return out[:count], nil }
func (p *DataPage) readDefinitionAndRepetitionLevels(rb *bufio.Reader) (repetition []uint64, defintion []uint64, err error) { // Repetition Levels // only levels that are repeated need a Repetition level: // optional or required fields are never repeated // and can be skipped while attributing repetition levels. if p.schema.GetRepetitionType() == thrift.FieldRepetitionType_REPEATED { repEnc := p.header.GetRepetitionLevelEncoding() switch repEnc { case thrift.Encoding_BIT_PACKED: dec := bitpacking.NewDecoder(1) runs, err := dec.ReadLength(rb) if err != nil { return nil, nil, fmt.Errorf("bitpacking.ReadLength:%s", err) } out := make([]int32, min(uint(p.header.GetNumValues()), runs*8)) if err := dec.Read(rb, out); err != nil { return nil, nil, fmt.Errorf("bitpacking cannot read:%s", err) } log.Println("WARNING GOT REPETITION:", len(out), p.header.GetNumValues()) // result := make([]int32, 0, int(runs*8)) // finish: // for i := 0; i < int(runs); i++ { // if err := dec.Read(rb, out); err != nil { // return nil, nil, fmt.Errorf("bitpacking cannot read after %d blocks:%s", i, err) // } // for j := 0; j < 8; j++ { // if len(result)+1 > int(p.header.GetNumValues()) { // break finish // } // result = append(result, out[j]) // } // } default: return nil, nil, fmt.Errorf("WARNING could not handle %s", repEnc) } } // Definition Levels // For data that is required, the definition levels are skipped. // If encoded, it will always have the value of the max definition level. if p.schema.GetRepetitionType() != thrift.FieldRepetitionType_REQUIRED { defEnc := p.header.GetDefinitionLevelEncoding() switch defEnc { case thrift.Encoding_RLE: p.maxDefinitionLevels = 0 // length of the <encoded-data> in bytes stored as 4 bytes little endian var length uint32 if err := binary.Read(rb, binary.LittleEndian, &length); err != nil { return nil, nil, err } lr := io.LimitReader(rb, int64(length)) values, err := rle.ReadBool(lr, uint(p.header.GetNumValues())) if err != nil { return nil, nil, err } p.DefinitionLevels = values if n, _ := io.Copy(ioutil.Discard, lr); n > 0 { log.Println("WARNING not all data was consumed in RLE encoder") } default: return nil, nil, fmt.Errorf("WARNING could not handle %s", defEnc) } } return []uint64{}, []uint64{}, nil }