Ejemplo n.º 1
0
func newFieldReader(parent *BlockTreeTermsReader,
	fieldInfo *FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int,
	indexStartFP int64, longsSize int, indexIn store.IndexInput,
	minTerm, maxTerm []byte) (r FieldReader, err error) {

	// log.Print("Initializing FieldReader...")
	assert(numTerms > 0)
	r = FieldReader{
		parent:           parent,
		fieldInfo:        fieldInfo,
		numTerms:         numTerms,
		sumTotalTermFreq: sumTotalTermFreq,
		sumDocFreq:       sumDocFreq,
		docCount:         docCount,
		indexStartFP:     indexStartFP,
		rootCode:         rootCode,
		longsSize:        longsSize,
		minTerm:          minTerm,
		maxTerm:          maxTerm,
	}
	// log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
	// 	parent.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		// log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
Ejemplo n.º 2
0
func newFieldReader(owner *BlockTreeTermsReader,
	fieldInfo model.FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int32, indexStartFP int64,
	indexIn store.IndexInput) (r FieldReader, err error) {
	log.Print("Initializing FieldReader...")
	if numTerms <= 0 {
		panic("assert fail")
	}
	// assert numTerms > 0
	r = FieldReader{
		BlockTreeTermsReader: owner,
		fieldInfo:            fieldInfo,
		numTerms:             numTerms,
		sumTotalTermFreq:     sumTotalTermFreq,
		sumDocFreq:           sumDocFreq,
		docCount:             docCount,
		indexStartFP:         indexStartFP,
		rootCode:             rootCode,
	}
	log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
		owner.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
Ejemplo n.º 3
0
func (r *CompressingStoredFieldsReader) VisitDocument(docID int, visitor StoredFieldVisitor) error {
	err := r.fieldsStream.Seek(r.indexReader.startPointer(docID))
	if err != nil {
		return err
	}

	docBase, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	chunkDocs, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	if docID < docBase ||
		docID >= docBase+chunkDocs ||
		docBase+chunkDocs > r.numDocs {
		return errors.New(fmt.Sprintf(
			"Corrupted: docID=%v, docBase=%v, chunkDocs=%v, numDocs=%v (resource=%v)",
			docID, docBase, chunkDocs, r.numDocs, r.fieldsStream))
	}

	var numStoredFields, offset, length, totalLength int
	if chunkDocs == 1 {
		if numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		offset = 0
		if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		totalLength = length
	} else {
		bitsPerStoredFields, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerStoredFields == 0 {
			numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt())
			if err != nil {
				return err
			}
		} else if bitsPerStoredFields > 31 {
			return errors.New(fmt.Sprintf("bitsPerStoredFields=%v (resource=%v)",
				bitsPerStoredFields, r.fieldsStream))
		} else {
			panic("not implemented yet")
		}

		bitsPerLength, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerLength == 0 {
			if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
				return err
			}
			offset = (docID - docBase) * length
			totalLength = chunkDocs * length
		} else if bitsPerLength > 31 {
			return errors.New(fmt.Sprintf("bitsPerLength=%v (resource=%v)",
				bitsPerLength, r.fieldsStream))
		} else {
			it := packed.ReaderIteratorNoHeader(
				r.fieldsStream, packed.PackedFormat(packed.PACKED), r.packedIntsVersion,
				chunkDocs, bitsPerLength, 1)
			var n int64
			off := 0
			for i := 0; i < docID-docBase; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			offset = off
			if n, err = it.Next(); err != nil {
				return err
			}
			length = int(n)
			off += length
			for i := docID - docBase + 1; i < chunkDocs; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			totalLength = off
		}
	}

	if (length == 0) != (numStoredFields == 0) {
		return errors.New(fmt.Sprintf(
			"length=%v, numStoredFields=%v (resource=%v)",
			length, numStoredFields, r.fieldsStream))
	}
	if numStoredFields == 0 {
		// nothing to do
		return nil
	}

	var documentInput util.DataInput
	if r.version >= VERSION_BIG_CHUNKS && totalLength >= 2*r.chunkSize {
		panic("not implemented yet")
	} else {
		var bytes []byte
		if totalLength <= BUFFER_REUSE_THRESHOLD {
			bytes = r.bytes
		} else {
			bytes = make([]byte, 0)
		}
		bytes, err = r.decompressor(r.fieldsStream, totalLength, offset, length, bytes)
		if err != nil {
			return err
		}
		assert(len(bytes) == length)
		documentInput = store.NewByteArrayDataInput(bytes)
	}

	for fieldIDX := 0; fieldIDX < numStoredFields; fieldIDX++ {
		infoAndBits, err := documentInput.ReadVLong()
		if err != nil {
			return err
		}
		fieldNumber := int(uint64(infoAndBits) >> uint64(TYPE_BITS))
		fieldInfo := r.fieldInfos.FieldInfoByNumber(fieldNumber)

		bits := int(infoAndBits & int64(TYPE_MASK))
		assertWithMessage(bits <= NUMERIC_DOUBLE, fmt.Sprintf("bits=%x", bits))

		status, err := visitor.NeedsField(fieldInfo)
		if err != nil {
			return err
		}
		switch status {
		case STORED_FIELD_VISITOR_STATUS_YES:
			r.readField(documentInput, visitor, fieldInfo, bits)
		case STORED_FIELD_VISITOR_STATUS_NO:
			panic("not implemented yet")
		case STORED_FIELD_VISITOR_STATUS_STOP:
			return nil
		}
	}

	return nil
}
Ejemplo n.º 4
0
func (e *SegmentTermsEnum) printSeekState() {
	if e.currentFrame == e.staticFrame {
		log.Println("  no prior seek")
	} else {
		log.Println("  prior seek state:")
		ord := 0
		isSeekFrame := true
		for {
			f := e.frame(ord)
			assert(f != nil)
			prefix := e.term.bytes[0:f.prefix]
			if f.nextEnt == -1 {
				action := "(next)"
				if isSeekFrame {
					action = "(seek)"
				}
				fpOrigValue := ""
				if f.isFloor {
					fpOrigValue = fmt.Sprintf(" (fpOrig=%v", f.fpOrig)
				}
				code := (f.fp << BTT_OUTPUT_FLAGS_NUM_BITS)
				if f.hasTerms {
					code += BTT_OUTPUT_FLAG_HAS_TERMS
				}
				if f.isFloor {
					code += BTT_OUTPUT_FLAG_IS_FLOOR
				}
				log.Printf("    frame %v ord=%v fp=%v%v prefixLen=%v prefix=%v hasTerms=%v isFloor=%v code=%v isLastInFloor=%v mdUpto=%v tbOrd=%v",
					action, ord, f.fp, fpOrigValue, f.prefix, prefix, f.hasTerms, f.isFloor, code, f.isLastInFloor, f.metaDataUpto, f.getTermBlockOrd())
			} else {
				action := "(next, loaded)"
				if isSeekFrame {
					action = "(seek, loaded)"
				}
				fpOrigValue := ""
				if f.isFloor {
					fpOrigValue = fmt.Sprintf(" (fpOrig=%v", f.fpOrig)
				}
				code := (f.fp << BTT_OUTPUT_FLAGS_NUM_BITS)
				if f.hasTerms {
					code += BTT_OUTPUT_FLAG_HAS_TERMS
				}
				if f.isFloor {
					code += BTT_OUTPUT_FLAG_IS_FLOOR
				}
				log.Printf("    frame %v ord=%v fp=%v prefixLen=%v prefix=%v nextEnt=%v (of %v) hasTerms=%v isFloor=%v code=%v lastSubFP=%v isLastInFloor=%v mdUpto=%v tbOrd=%v",
					action, ord, f.fp, fpOrigValue, f.prefix, prefix, f.nextEnt, f.entCount, f.hasTerms, f.isFloor, code, f.lastSubFP, f.isLastInFloor, f.metaDataUpto, f.getTermBlockOrd())
			}
			if e.index != nil {
				if isSeekFrame && f.arc == nil {
					log.Printf("isSeekFrame=%v f.arc=%v", isSeekFrame, f.arc)
					panic("assert fail")
				}
				ret, err := fst.GetFSTOutput(e.index, prefix)
				if err != nil {
					panic(err)
				}
				output := ret.([]byte)
				if output == nil {
					log.Println("      broken seek state: prefix is not final in index")
					panic("seek state is broken")
				} else if isSeekFrame && !f.isFloor {
					reader := store.NewByteArrayDataInput(output)
					codeOrig, _ := reader.ReadVLong()
					code := f.fp << BTT_OUTPUT_FLAGS_NUM_BITS
					if f.hasTerms {
						code += BTT_OUTPUT_FLAG_HAS_TERMS
					}
					if f.isFloor {
						code += BTT_OUTPUT_FLAG_IS_FLOOR
					}
					if codeOrig != code {
						log.Printf("      broken seek state: output code=%v doesn't match frame code=%v", codeOrig, code)
						panic("seek state is broken")
					}
				}
			}
			if f == e.currentFrame {
				break
			}
			if f.prefix == e.validIndexPrefix {
				isSeekFrame = false
			}
			ord++
		}
	}
}