func newCompressingStoredFieldsIndexReader(fieldsIndexIn store.IndexInput, si SegmentInfo) (r *CompressingStoredFieldsIndexReader, err error) { r = &CompressingStoredFieldsIndexReader{} r.maxDoc = int(si.docCount) r.docBases = make([]int, 0, 16) r.startPointers = make([]int64, 0, 16) r.avgChunkDocs = make([]int, 0, 16) r.avgChunkSizes = make([]int64, 0, 16) r.docBasesDeltas = make([]util.PackedIntsReader, 0, 16) r.startPointersDeltas = make([]util.PackedIntsReader, 0, 16) packedIntsVersion, err := fieldsIndexIn.ReadVInt() if err != nil { return nil, err } for blockCount := 0; ; blockCount++ { numChunks, err := fieldsIndexIn.ReadVInt() if err != nil { return nil, err } if numChunks == 0 { break } { // doc bases n, err := fieldsIndexIn.ReadVInt() if err != nil { return nil, err } r.docBases = append(r.docBases, int(n)) n, err = fieldsIndexIn.ReadVInt() if err != nil { return nil, err } r.avgChunkDocs = append(r.avgChunkDocs, int(n)) bitsPerDocBase, err := fieldsIndexIn.ReadVInt() if err != nil { return nil, err } if bitsPerDocBase > 32 { return nil, errors.New(fmt.Sprintf("Corrupted bitsPerDocBase (resource=%v)", fieldsIndexIn)) } pr, err := util.NewPackedReaderNoHeader(fieldsIndexIn, util.PACKED, packedIntsVersion, numChunks, uint32(bitsPerDocBase)) if err != nil { return nil, err } r.docBasesDeltas = append(r.docBasesDeltas, pr) } { // start pointers n, err := fieldsIndexIn.ReadVLong() if err != nil { return nil, err } r.startPointers = append(r.startPointers, n) n, err = fieldsIndexIn.ReadVLong() if err != nil { return nil, err } r.avgChunkSizes = append(r.avgChunkSizes, n) bitsPerStartPointer, err := fieldsIndexIn.ReadVInt() if err != nil { return nil, err } if bitsPerStartPointer > 64 { return nil, errors.New(fmt.Sprintf("Corrupted bitsPerStartPonter (resource=%v)", fieldsIndexIn)) } pr, err := util.NewPackedReaderNoHeader(fieldsIndexIn, util.PACKED, packedIntsVersion, numChunks, uint32(bitsPerStartPointer)) if err != nil { return nil, err } r.startPointersDeltas = append(r.startPointersDeltas, pr) } } return r, nil }
func newBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_EXTENSION), ctx) if err != nil { return fp, err } success := false var indexIn store.IndexInput defer func() { if !success { log.Print("Failed to initialize BlockTreeTermsReader.") if err != nil { log.Print("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return fp, err } log.Printf("Version: %v", fp.version) if indexDivisor != -1 { indexIn, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_INDEX_EXTENSION), ctx) if err != nil { return fp, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return fp, err } log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // Have PostingsReader init itself postingsReader.Init(fp.in) // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Fields number: %v", numFields) if numFields < 0 { return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return fp, err } // assert numTerms >= 0 log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return fp, err } fieldInfo := fieldInfos.byNumber[field] // assert fieldInfo != nil var sumTotalTermFreq int64 if fieldInfo.indexOptions == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return fp, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return fp, err } docCount, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("DocCount: %v", docCount) if docCount < 0 || docCount > info.docCount { // #docs with field must be <= #docs return fp, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.docCount, fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return fp, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return fp, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { indexStartFP, err = indexIn.ReadVLong() if err != nil { return fp, err } } log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.name]; ok { return fp, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.name, fp.in)) } fp.fields[fieldInfo.name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn) if err != nil { return fp, err } log.Print("DEBUG field processed.") } if indexDivisor != -1 { err = indexIn.Close() if err != nil { return fp, err } } success = true return fp, nil }