// Scan reads the next page inside the column chunk. returns false if no more data pages // are present or if an error occurred. func (s *scanner) Scan() bool { var ( header thrift.PageHeader ) if s.err != nil { return false } s.dictionary = nil s.dataPage = nil s.indexPage = nil err := header.Read(s.r) if err != nil { if strings.HasSuffix(err.Error(), "EOF") { // FIXME: find a better way to detect io.EOF s.setErr(io.EOF) return false } s.setErr(fmt.Errorf("column scanner: could not read chunk header: %s", err)) return false } // setup reader r := io.LimitReader(s.r, int64(header.CompressedPageSize)) r, err = s.compressionReader(r, &header) if err != nil { s.setErr(err) return false } // read the page if err := s.readPage(r, &header); err != nil { s.setErr(err) return false } // check if we consumed all the data from the limit reader as a safe guard if n, err := io.Copy(ioutil.Discard, r); err != nil { if err == io.EOF { return true } s.setErr(err) return false } else if n > 0 { err := fmt.Errorf("not all the data was consumed for page %s", header.GetType()) s.setErr(err) return false } return true }
func (s *scanner) readPage(r io.Reader, header *thrift.PageHeader) error { switch header.GetType() { case thrift.PageType_INDEX_PAGE: if !header.IsSetIndexPageHeader() { return nil } s.indexPage = NewIndexPage(header.GetIndexPageHeader()) // TODO read indexPage return nil case thrift.PageType_DICTIONARY_PAGE: if !header.IsSetDictionaryPageHeader() { return fmt.Errorf("bad file format:DictionaryPageHeader flag was not set") } dictHeader := header.GetDictionaryPageHeader() s.dictionary = NewDictionaryPage(s.schema, dictHeader) return s.dictionary.Decode(r) case thrift.PageType_DATA_PAGE_V2: panic("nyi") case thrift.PageType_DATA_PAGE: s.totalRead += int(header.GetDataPageHeader().GetNumValues()) if !header.IsSetDataPageHeader() { return fmt.Errorf("bad file format: DataPageHeader flag was not set") } s.dataPage = NewDataPage(s.schema, header.GetDataPageHeader()) return s.dataPage.ReadAll(r) default: return fmt.Errorf("unknown PageHeader.PageType: %s", header.GetType()) } }