func newSegmentTermsEnum(r *FieldReader) *SegmentTermsEnum { ans := &SegmentTermsEnum{ FieldReader: r, stack: make([]*segmentTermsEnumFrame, 0), scratchReader: store.NewEmptyByteArrayDataInput(), term: newBytesRef(), arcs: make([]*fst.Arc, 1), fstOutputs: fst.ByteSequenceOutputsSingleton(), } ans.TermsEnumImpl = newTermsEnumImpl(ans) log.Printf("BTTR.init seg=%v", r.segment) // Used to hold seek by TermState, or cached seek ans.staticFrame = newFrame(ans, -1) if r.index != nil { ans.fstReader = r.index.BytesReader() } // Init w/ root block; don't use index since it may // not (and need not) have been loaded for i, _ := range ans.arcs { ans.arcs[i] = &fst.Arc{} } ans.currentFrame = ans.staticFrame var arc *fst.Arc if r.index != nil { arc = r.index.FirstArc(ans.arcs[0]) // Empty string prefix must have an output in the index! if !arc.IsFinal() { panic("assert fail") } } ans.currentFrame = ans.staticFrame ans.validIndexPrefix = 0 log.Printf("init frame state %v", ans.currentFrame.ord) ans.printSeekState() // ans.computeBlockStats() return ans }
func newFieldReader(parent *BlockTreeTermsReader, fieldInfo *FieldInfo, numTerms int64, rootCode []byte, sumTotalTermFreq, sumDocFreq int64, docCount int, indexStartFP int64, longsSize int, indexIn store.IndexInput, minTerm, maxTerm []byte) (r FieldReader, err error) { // log.Print("Initializing FieldReader...") assert(numTerms > 0) r = FieldReader{ parent: parent, fieldInfo: fieldInfo, numTerms: numTerms, sumTotalTermFreq: sumTotalTermFreq, sumDocFreq: sumDocFreq, docCount: docCount, indexStartFP: indexStartFP, rootCode: rootCode, longsSize: longsSize, minTerm: minTerm, maxTerm: maxTerm, } // log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=", // parent.segment, fieldInfo.Name, rootCode) in := store.NewByteArrayDataInput(rootCode) n, err := in.ReadVLong() if err != nil { return r, err } r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS) if indexIn != nil { clone := indexIn.Clone() // log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name) clone.Seek(indexStartFP) r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton()) } return r, err }
func newFieldReader(owner *BlockTreeTermsReader, fieldInfo model.FieldInfo, numTerms int64, rootCode []byte, sumTotalTermFreq, sumDocFreq int64, docCount int32, indexStartFP int64, indexIn store.IndexInput) (r FieldReader, err error) { log.Print("Initializing FieldReader...") if numTerms <= 0 { panic("assert fail") } // assert numTerms > 0 r = FieldReader{ BlockTreeTermsReader: owner, fieldInfo: fieldInfo, numTerms: numTerms, sumTotalTermFreq: sumTotalTermFreq, sumDocFreq: sumDocFreq, docCount: docCount, indexStartFP: indexStartFP, rootCode: rootCode, } log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=", owner.segment, fieldInfo.Name, rootCode) in := store.NewByteArrayDataInput(rootCode) n, err := in.ReadVLong() if err != nil { return r, err } r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS) if indexIn != nil { clone := indexIn.Clone() log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name) clone.Seek(indexStartFP) r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton()) } return r, err }
func (b *PendingBlock) compileIndex(blocks []*PendingBlock, scratchBytes *store.RAMOutputStream, scratchIntsRef *util.IntsRefBuilder) (err error) { assert2(b.isFloor && len(blocks) > 1 || (!b.isFloor && len(blocks) == 1), "isFloor=%v blocks=%v", b.isFloor, blocks) assert(blocks[0] == b) assert(scratchBytes.FilePointer() == 0) // TODO: try writing the leading vLong in MSB order // (opposite of what Lucene does today), for better // outputs sharing in the FST if err = scratchBytes.WriteVLong(encodeOutput(b.fp, b.hasTerms, b.isFloor)); err != nil { return } if b.isFloor { if err = scratchBytes.WriteVInt(int32(len(blocks) - 1)); err != nil { return } for _, sub := range blocks[1:] { assert(sub.floorLeadByte != -1) // fmt.Printf(" write floorLeadByte=%v\n", util.ItoHex(int64(sub.floorLeadByte))) if err = scratchBytes.WriteByte(byte(sub.floorLeadByte)); err != nil { return } assert(sub.fp > b.fp) if err = scratchBytes.WriteVLong((sub.fp-b.fp)<<1 | int64(map[bool]int{true: 1, false: 0}[sub.hasTerms])); err != nil { return } } } outputs := fst.ByteSequenceOutputsSingleton() indexBuilder := fst.NewBuilder(fst.INPUT_TYPE_BYTE1, 0, 0, true, false, int(math.MaxInt32), outputs, false, packed.PackedInts.COMPACT, true, 15) // fmt.Printf(" compile index for prefix=%v\n", b.prefix) bytes := make([]byte, scratchBytes.FilePointer()) assert(len(bytes) > 0) err = scratchBytes.WriteToBytes(bytes) if err != nil { return err } err = indexBuilder.Add(fst.ToIntsRef(b.prefix, scratchIntsRef), bytes) if err != nil { return err } scratchBytes.Reset() // copy over index for all sub-blocks for _, block := range blocks { if block.subIndices != nil { for _, subIndex := range block.subIndices { if err = b.append(indexBuilder, subIndex, scratchIntsRef); err != nil { return err } } } block.subIndices = nil } if b.index, err = indexBuilder.Finish(); err != nil { return err } assert(b.subIndices == nil) return nil }
package blocktree import ( "fmt" . "github.com/balzaczyy/golucene/core/index/model" "github.com/balzaczyy/golucene/core/store" "github.com/balzaczyy/golucene/core/util" "github.com/balzaczyy/golucene/core/util/fst" "sort" // "strconv" ) // blocktree/SegmentTermsEnum.java var fstOutputs = fst.ByteSequenceOutputsSingleton() var noOutput = fstOutputs.NoOutput() // Iterates through terms in this field type SegmentTermsEnum struct { *TermsEnumImpl // lazy init: in store.IndexInput stack []*segmentTermsEnumFrame staticFrame *segmentTermsEnumFrame currentFrame *segmentTermsEnumFrame termExists bool fr *FieldReader targetBeforeCurrentLength int