Пример #1
0
func newSegmentTermsEnum(r *FieldReader) *SegmentTermsEnum {
	ans := &SegmentTermsEnum{
		FieldReader:   r,
		stack:         make([]*segmentTermsEnumFrame, 0),
		scratchReader: store.NewEmptyByteArrayDataInput(),
		term:          newBytesRef(),
		arcs:          make([]*fst.Arc, 1),
		fstOutputs:    fst.ByteSequenceOutputsSingleton(),
	}
	ans.TermsEnumImpl = newTermsEnumImpl(ans)
	log.Printf("BTTR.init seg=%v", r.segment)

	// Used to hold seek by TermState, or cached seek
	ans.staticFrame = newFrame(ans, -1)

	if r.index != nil {
		ans.fstReader = r.index.BytesReader()
	}

	// Init w/ root block; don't use index since it may
	// not (and need not) have been loaded
	for i, _ := range ans.arcs {
		ans.arcs[i] = &fst.Arc{}
	}

	ans.currentFrame = ans.staticFrame
	var arc *fst.Arc
	if r.index != nil {
		arc = r.index.FirstArc(ans.arcs[0])
		// Empty string prefix must have an output in the index!
		if !arc.IsFinal() {
			panic("assert fail")
		}
	}
	ans.currentFrame = ans.staticFrame
	ans.validIndexPrefix = 0
	log.Printf("init frame state %v", ans.currentFrame.ord)
	ans.printSeekState()

	// ans.computeBlockStats()

	return ans
}
Пример #2
0
func newFieldReader(parent *BlockTreeTermsReader,
	fieldInfo *FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int,
	indexStartFP int64, longsSize int, indexIn store.IndexInput,
	minTerm, maxTerm []byte) (r FieldReader, err error) {

	// log.Print("Initializing FieldReader...")
	assert(numTerms > 0)
	r = FieldReader{
		parent:           parent,
		fieldInfo:        fieldInfo,
		numTerms:         numTerms,
		sumTotalTermFreq: sumTotalTermFreq,
		sumDocFreq:       sumDocFreq,
		docCount:         docCount,
		indexStartFP:     indexStartFP,
		rootCode:         rootCode,
		longsSize:        longsSize,
		minTerm:          minTerm,
		maxTerm:          maxTerm,
	}
	// log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
	// 	parent.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		// log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
Пример #3
0
func newFieldReader(owner *BlockTreeTermsReader,
	fieldInfo model.FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int32, indexStartFP int64,
	indexIn store.IndexInput) (r FieldReader, err error) {
	log.Print("Initializing FieldReader...")
	if numTerms <= 0 {
		panic("assert fail")
	}
	// assert numTerms > 0
	r = FieldReader{
		BlockTreeTermsReader: owner,
		fieldInfo:            fieldInfo,
		numTerms:             numTerms,
		sumTotalTermFreq:     sumTotalTermFreq,
		sumDocFreq:           sumDocFreq,
		docCount:             docCount,
		indexStartFP:         indexStartFP,
		rootCode:             rootCode,
	}
	log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
		owner.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
Пример #4
0
func (b *PendingBlock) compileIndex(blocks []*PendingBlock,
	scratchBytes *store.RAMOutputStream,
	scratchIntsRef *util.IntsRefBuilder) (err error) {

	assert2(b.isFloor && len(blocks) > 1 || (!b.isFloor && len(blocks) == 1),
		"isFloor=%v blocks=%v", b.isFloor, blocks)
	assert(blocks[0] == b)

	assert(scratchBytes.FilePointer() == 0)

	// TODO: try writing the leading vLong in MSB order
	// (opposite of what Lucene does today), for better
	// outputs sharing in the FST
	if err = scratchBytes.WriteVLong(encodeOutput(b.fp, b.hasTerms, b.isFloor)); err != nil {
		return
	}
	if b.isFloor {
		if err = scratchBytes.WriteVInt(int32(len(blocks) - 1)); err != nil {
			return
		}
		for _, sub := range blocks[1:] {
			assert(sub.floorLeadByte != -1)
			// fmt.Printf("    write floorLeadByte=%v\n", util.ItoHex(int64(sub.floorLeadByte)))
			if err = scratchBytes.WriteByte(byte(sub.floorLeadByte)); err != nil {
				return
			}
			assert(sub.fp > b.fp)
			if err = scratchBytes.WriteVLong((sub.fp-b.fp)<<1 |
				int64(map[bool]int{true: 1, false: 0}[sub.hasTerms])); err != nil {
				return
			}
		}
	}

	outputs := fst.ByteSequenceOutputsSingleton()
	indexBuilder := fst.NewBuilder(fst.INPUT_TYPE_BYTE1,
		0, 0, true, false, int(math.MaxInt32),
		outputs, false,
		packed.PackedInts.COMPACT, true, 15)

	// fmt.Printf("  compile index for prefix=%v\n", b.prefix)

	bytes := make([]byte, scratchBytes.FilePointer())
	assert(len(bytes) > 0)
	err = scratchBytes.WriteToBytes(bytes)
	if err != nil {
		return err
	}
	err = indexBuilder.Add(fst.ToIntsRef(b.prefix, scratchIntsRef), bytes)
	if err != nil {
		return err
	}
	scratchBytes.Reset()

	// copy over index for all sub-blocks
	for _, block := range blocks {
		if block.subIndices != nil {
			for _, subIndex := range block.subIndices {
				if err = b.append(indexBuilder, subIndex, scratchIntsRef); err != nil {
					return err
				}
			}
		}
		block.subIndices = nil
	}

	if b.index, err = indexBuilder.Finish(); err != nil {
		return err
	}
	assert(b.subIndices == nil)
	return nil
}
Пример #5
0
package blocktree

import (
	"fmt"
	. "github.com/balzaczyy/golucene/core/index/model"
	"github.com/balzaczyy/golucene/core/store"
	"github.com/balzaczyy/golucene/core/util"
	"github.com/balzaczyy/golucene/core/util/fst"
	"sort"
	// "strconv"
)

// blocktree/SegmentTermsEnum.java

var fstOutputs = fst.ByteSequenceOutputsSingleton()
var noOutput = fstOutputs.NoOutput()

// Iterates through terms in this field
type SegmentTermsEnum struct {
	*TermsEnumImpl

	// lazy init:
	in store.IndexInput

	stack        []*segmentTermsEnumFrame
	staticFrame  *segmentTermsEnumFrame
	currentFrame *segmentTermsEnumFrame
	termExists   bool
	fr           *FieldReader

	targetBeforeCurrentLength int