func newFieldReader(parent *BlockTreeTermsReader, fieldInfo *FieldInfo, numTerms int64, rootCode []byte, sumTotalTermFreq, sumDocFreq int64, docCount int, indexStartFP int64, longsSize int, indexIn store.IndexInput, minTerm, maxTerm []byte) (r FieldReader, err error) { // log.Print("Initializing FieldReader...") assert(numTerms > 0) r = FieldReader{ parent: parent, fieldInfo: fieldInfo, numTerms: numTerms, sumTotalTermFreq: sumTotalTermFreq, sumDocFreq: sumDocFreq, docCount: docCount, indexStartFP: indexStartFP, rootCode: rootCode, longsSize: longsSize, minTerm: minTerm, maxTerm: maxTerm, } // log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=", // parent.segment, fieldInfo.Name, rootCode) in := store.NewByteArrayDataInput(rootCode) n, err := in.ReadVLong() if err != nil { return r, err } r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS) if indexIn != nil { clone := indexIn.Clone() // log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name) clone.Seek(indexStartFP) r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton()) } return r, err }
func newFieldReader(owner *BlockTreeTermsReader, fieldInfo model.FieldInfo, numTerms int64, rootCode []byte, sumTotalTermFreq, sumDocFreq int64, docCount int32, indexStartFP int64, indexIn store.IndexInput) (r FieldReader, err error) { log.Print("Initializing FieldReader...") if numTerms <= 0 { panic("assert fail") } // assert numTerms > 0 r = FieldReader{ BlockTreeTermsReader: owner, fieldInfo: fieldInfo, numTerms: numTerms, sumTotalTermFreq: sumTotalTermFreq, sumDocFreq: sumDocFreq, docCount: docCount, indexStartFP: indexStartFP, rootCode: rootCode, } log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=", owner.segment, fieldInfo.Name, rootCode) in := store.NewByteArrayDataInput(rootCode) n, err := in.ReadVLong() if err != nil { return r, err } r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS) if indexIn != nil { clone := indexIn.Clone() log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name) clone.Seek(indexStartFP) r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton()) } return r, err }
func (r *CompressingStoredFieldsReader) VisitDocument(docID int, visitor StoredFieldVisitor) error { err := r.fieldsStream.Seek(r.indexReader.startPointer(docID)) if err != nil { return err } docBase, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } chunkDocs, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if docID < docBase || docID >= docBase+chunkDocs || docBase+chunkDocs > r.numDocs { return errors.New(fmt.Sprintf( "Corrupted: docID=%v, docBase=%v, chunkDocs=%v, numDocs=%v (resource=%v)", docID, docBase, chunkDocs, r.numDocs, r.fieldsStream)) } var numStoredFields, offset, length, totalLength int if chunkDocs == 1 { if numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } offset = 0 if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } totalLength = length } else { bitsPerStoredFields, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if bitsPerStoredFields == 0 { numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } } else if bitsPerStoredFields > 31 { return errors.New(fmt.Sprintf("bitsPerStoredFields=%v (resource=%v)", bitsPerStoredFields, r.fieldsStream)) } else { panic("not implemented yet") } bitsPerLength, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if bitsPerLength == 0 { if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } offset = (docID - docBase) * length totalLength = chunkDocs * length } else if bitsPerLength > 31 { return errors.New(fmt.Sprintf("bitsPerLength=%v (resource=%v)", bitsPerLength, r.fieldsStream)) } else { it := packed.ReaderIteratorNoHeader( r.fieldsStream, packed.PackedFormat(packed.PACKED), r.packedIntsVersion, chunkDocs, bitsPerLength, 1) var n int64 off := 0 for i := 0; i < docID-docBase; i++ { if n, err = it.Next(); err != nil { return err } off += int(n) } offset = off if n, err = it.Next(); err != nil { return err } length = int(n) off += length for i := docID - docBase + 1; i < chunkDocs; i++ { if n, err = it.Next(); err != nil { return err } off += int(n) } totalLength = off } } if (length == 0) != (numStoredFields == 0) { return errors.New(fmt.Sprintf( "length=%v, numStoredFields=%v (resource=%v)", length, numStoredFields, r.fieldsStream)) } if numStoredFields == 0 { // nothing to do return nil } var documentInput util.DataInput if r.version >= VERSION_BIG_CHUNKS && totalLength >= 2*r.chunkSize { panic("not implemented yet") } else { var bytes []byte if totalLength <= BUFFER_REUSE_THRESHOLD { bytes = r.bytes } else { bytes = make([]byte, 0) } bytes, err = r.decompressor(r.fieldsStream, totalLength, offset, length, bytes) if err != nil { return err } assert(len(bytes) == length) documentInput = store.NewByteArrayDataInput(bytes) } for fieldIDX := 0; fieldIDX < numStoredFields; fieldIDX++ { infoAndBits, err := documentInput.ReadVLong() if err != nil { return err } fieldNumber := int(uint64(infoAndBits) >> uint64(TYPE_BITS)) fieldInfo := r.fieldInfos.FieldInfoByNumber(fieldNumber) bits := int(infoAndBits & int64(TYPE_MASK)) assertWithMessage(bits <= NUMERIC_DOUBLE, fmt.Sprintf("bits=%x", bits)) status, err := visitor.NeedsField(fieldInfo) if err != nil { return err } switch status { case STORED_FIELD_VISITOR_STATUS_YES: r.readField(documentInput, visitor, fieldInfo, bits) case STORED_FIELD_VISITOR_STATUS_NO: panic("not implemented yet") case STORED_FIELD_VISITOR_STATUS_STOP: return nil } } return nil }
func (e *SegmentTermsEnum) printSeekState() { if e.currentFrame == e.staticFrame { log.Println(" no prior seek") } else { log.Println(" prior seek state:") ord := 0 isSeekFrame := true for { f := e.frame(ord) assert(f != nil) prefix := e.term.bytes[0:f.prefix] if f.nextEnt == -1 { action := "(next)" if isSeekFrame { action = "(seek)" } fpOrigValue := "" if f.isFloor { fpOrigValue = fmt.Sprintf(" (fpOrig=%v", f.fpOrig) } code := (f.fp << BTT_OUTPUT_FLAGS_NUM_BITS) if f.hasTerms { code += BTT_OUTPUT_FLAG_HAS_TERMS } if f.isFloor { code += BTT_OUTPUT_FLAG_IS_FLOOR } log.Printf(" frame %v ord=%v fp=%v%v prefixLen=%v prefix=%v hasTerms=%v isFloor=%v code=%v isLastInFloor=%v mdUpto=%v tbOrd=%v", action, ord, f.fp, fpOrigValue, f.prefix, prefix, f.hasTerms, f.isFloor, code, f.isLastInFloor, f.metaDataUpto, f.getTermBlockOrd()) } else { action := "(next, loaded)" if isSeekFrame { action = "(seek, loaded)" } fpOrigValue := "" if f.isFloor { fpOrigValue = fmt.Sprintf(" (fpOrig=%v", f.fpOrig) } code := (f.fp << BTT_OUTPUT_FLAGS_NUM_BITS) if f.hasTerms { code += BTT_OUTPUT_FLAG_HAS_TERMS } if f.isFloor { code += BTT_OUTPUT_FLAG_IS_FLOOR } log.Printf(" frame %v ord=%v fp=%v prefixLen=%v prefix=%v nextEnt=%v (of %v) hasTerms=%v isFloor=%v code=%v lastSubFP=%v isLastInFloor=%v mdUpto=%v tbOrd=%v", action, ord, f.fp, fpOrigValue, f.prefix, prefix, f.nextEnt, f.entCount, f.hasTerms, f.isFloor, code, f.lastSubFP, f.isLastInFloor, f.metaDataUpto, f.getTermBlockOrd()) } if e.index != nil { if isSeekFrame && f.arc == nil { log.Printf("isSeekFrame=%v f.arc=%v", isSeekFrame, f.arc) panic("assert fail") } ret, err := fst.GetFSTOutput(e.index, prefix) if err != nil { panic(err) } output := ret.([]byte) if output == nil { log.Println(" broken seek state: prefix is not final in index") panic("seek state is broken") } else if isSeekFrame && !f.isFloor { reader := store.NewByteArrayDataInput(output) codeOrig, _ := reader.ReadVLong() code := f.fp << BTT_OUTPUT_FLAGS_NUM_BITS if f.hasTerms { code += BTT_OUTPUT_FLAG_HAS_TERMS } if f.isFloor { code += BTT_OUTPUT_FLAG_IS_FLOOR } if codeOrig != code { log.Printf(" broken seek state: output code=%v doesn't match frame code=%v", codeOrig, code) panic("seek state is broken") } } } if f == e.currentFrame { break } if f.prefix == e.validIndexPrefix { isSeekFrame = false } ord++ } } }