Example #1
0
// ReadBool
func ReadBool(r io.Reader, count uint) ([]bool, error) {
	var out []bool
	bitWidth := uint(1) // fixed for booleans
	byteWidth := (bitWidth + uint(7)) / uint(8)
	p := make([]byte, byteWidth)

	br := bufio.NewReader(r)

	for {

		// run := <bit-packed-run> | <rle-run>
		header, err := ReadVarint32(br)

		if err == io.EOF {
			break
		} else if err != nil {
			return nil, err
		}

		if (header & 1) == 1 {
			// bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
			// we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8
			// bit-pack-count := (number of values in this run) / 8
			literalCount := (header >> 1) * 8

			if uint(literalCount) > ((count - uint(len(out))) + 7) {
				return nil, fmt.Errorf("bitcoding.bool:bad encoding found more elements (%d) than expected (%d)", uint(len(out))+uint(literalCount), count)
			}

			r := bitpacking.NewDecoder(bitWidth)

			values := make([]int32, literalCount)

			if err := r.Read(br, values); err != nil {
				return nil, err
			}

			for i := int32(0); i < literalCount; i++ {
				out = append(out, values[i] == 1)
			}

		} else {
			// rle-run := <rle-header> <repeated-value>
			// rle-header := varint-encode( (number of times repeated) << 1)
			// repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
			repeatCount := int32(header >> 1)

			if _, err := br.Read(p); err != nil {
				return nil, fmt.Errorf("short read value: %s", err)
			}
			value := unpackLittleEndianInt32(p)

			if uint(repeatCount) > (count - uint(len(out))) {
				return nil, fmt.Errorf("rle.bool:bad encoding: found more elements (%d) than expected (%d)", uint(len(out))+uint(repeatCount), count)
			}

			for i := int32(0); i < repeatCount; i++ {
				out = append(out, value == 1)
			}
		}
	}

	if uint(len(out)) < count {
		return nil, fmt.Errorf("could not decode %d values only %d", count, len(out))
	}

	return out[:count], nil
}
Example #2
0
func (p *DataPage) readDefinitionAndRepetitionLevels(rb *bufio.Reader) (repetition []uint64, defintion []uint64, err error) {

	// Repetition Levels
	// only levels that are repeated need a Repetition level:
	// optional or required fields are never repeated
	// and can be skipped while attributing repetition levels.
	if p.schema.GetRepetitionType() == thrift.FieldRepetitionType_REPEATED {
		repEnc := p.header.GetRepetitionLevelEncoding()
		switch repEnc {
		case thrift.Encoding_BIT_PACKED:
			dec := bitpacking.NewDecoder(1)
			runs, err := dec.ReadLength(rb)
			if err != nil {
				return nil, nil, fmt.Errorf("bitpacking.ReadLength:%s", err)
			}
			out := make([]int32, min(uint(p.header.GetNumValues()), runs*8))
			if err := dec.Read(rb, out); err != nil {
				return nil, nil, fmt.Errorf("bitpacking cannot read:%s", err)
			}

			log.Println("WARNING GOT REPETITION:", len(out), p.header.GetNumValues())

		// 	result := make([]int32, 0, int(runs*8))
		// finish:
		// 	for i := 0; i < int(runs); i++ {
		// 		if err := dec.Read(rb, out); err != nil {
		// 			return nil, nil, fmt.Errorf("bitpacking cannot read after %d blocks:%s", i, err)
		// 		}

		// 		for j := 0; j < 8; j++ {
		// 			if len(result)+1 > int(p.header.GetNumValues()) {
		// 				break finish
		// 			}
		// 			result = append(result, out[j])
		// 		}
		// 	}

		default:
			return nil, nil, fmt.Errorf("WARNING could not handle %s", repEnc)
		}
	}

	// Definition Levels
	// For data that is required, the definition levels are skipped.
	// If encoded, it will always have the value of the max definition level.
	if p.schema.GetRepetitionType() != thrift.FieldRepetitionType_REQUIRED {
		defEnc := p.header.GetDefinitionLevelEncoding()
		switch defEnc {
		case thrift.Encoding_RLE:
			p.maxDefinitionLevels = 0
			// length of the <encoded-data> in bytes stored as 4 bytes little endian
			var length uint32

			if err := binary.Read(rb, binary.LittleEndian, &length); err != nil {
				return nil, nil, err
			}

			lr := io.LimitReader(rb, int64(length))

			values, err := rle.ReadBool(lr, uint(p.header.GetNumValues()))
			if err != nil {
				return nil, nil, err
			}

			p.DefinitionLevels = values

			if n, _ := io.Copy(ioutil.Discard, lr); n > 0 {
				log.Println("WARNING not all data was consumed in RLE encoder")
			}

		default:
			return nil, nil, fmt.Errorf("WARNING could not handle %s", defEnc)
		}
	}

	return []uint64{}, []uint64{}, nil
}