示例#1
0
// Scan reads the next page inside the column chunk. returns false if no more data pages
// are present or if an error occurred.
func (s *scanner) Scan() bool {
	var (
		header thrift.PageHeader
	)

	if s.err != nil {
		return false
	}

	s.dictionary = nil
	s.dataPage = nil
	s.indexPage = nil

	err := header.Read(s.r)
	if err != nil {
		if strings.HasSuffix(err.Error(), "EOF") { // FIXME: find a better way to detect io.EOF
			s.setErr(io.EOF)
			return false
		}
		s.setErr(fmt.Errorf("column scanner: could not read chunk header: %s", err))
		return false
	}

	// setup reader
	r := io.LimitReader(s.r, int64(header.CompressedPageSize))
	r, err = s.compressionReader(r, &header)
	if err != nil {
		s.setErr(err)
		return false
	}

	// read the page
	if err := s.readPage(r, &header); err != nil {
		s.setErr(err)
		return false
	}

	// check if we consumed all the data from the limit reader as a safe guard
	if n, err := io.Copy(ioutil.Discard, r); err != nil {
		if err == io.EOF {
			return true
		}
		s.setErr(err)
		return false
	} else if n > 0 {
		err := fmt.Errorf("not all the data was consumed for page %s", header.GetType())
		s.setErr(err)
		return false
	}

	return true
}
示例#2
0
func (s *scanner) readPage(r io.Reader, header *thrift.PageHeader) error {

	switch header.GetType() {

	case thrift.PageType_INDEX_PAGE:
		if !header.IsSetIndexPageHeader() {
			return nil
		}

		s.indexPage = NewIndexPage(header.GetIndexPageHeader())
		// TODO read indexPage
		return nil

	case thrift.PageType_DICTIONARY_PAGE:
		if !header.IsSetDictionaryPageHeader() {
			return fmt.Errorf("bad file format:DictionaryPageHeader flag was not set")
		}
		dictHeader := header.GetDictionaryPageHeader()
		s.dictionary = NewDictionaryPage(s.schema, dictHeader)
		return s.dictionary.Decode(r)

	case thrift.PageType_DATA_PAGE_V2:
		panic("nyi")

	case thrift.PageType_DATA_PAGE:
		s.totalRead += int(header.GetDataPageHeader().GetNumValues())

		if !header.IsSetDataPageHeader() {
			return fmt.Errorf("bad file format: DataPageHeader flag was not set")
		}
		s.dataPage = NewDataPage(s.schema, header.GetDataPageHeader())
		return s.dataPage.ReadAll(r)

	default:
		return fmt.Errorf("unknown PageHeader.PageType: %s", header.GetType())
	}
}